From 99e06e09894bda7401503d7cd618c0fe7124596d Mon Sep 17 00:00:00 2001 From: Anna Lungarska <lungarska@gmail.com> Date: Mon, 6 May 2024 14:57:49 +0200 Subject: [PATCH] version publique du code --- README.md | 100 +++---- src/00.connexion_sql.R | 16 ++ src/01.occsol.py | 81 ++++++ src/02.BNVd.R | 83 ++++++ src/03.Calc_coef.R | 524 ++++++++++++++++++++++++++++++++++++ src/04.BNVd_corrige_cp.R | 195 ++++++++++++++ src/05.Precalcul_com_adm.py | 164 +++++++++++ 7 files changed, 1097 insertions(+), 66 deletions(-) create mode 100644 src/00.connexion_sql.R create mode 100644 src/01.occsol.py create mode 100644 src/02.BNVd.R create mode 100644 src/03.Calc_coef.R create mode 100644 src/04.BNVd_corrige_cp.R create mode 100644 src/05.Precalcul_com_adm.py diff --git a/README.md b/README.md index f04aad6..3968e65 100644 --- a/README.md +++ b/README.md @@ -1,93 +1,61 @@ -# obnvds-publique +# oBNVDs publique +## Introduction +This is a restricted (no identifications) version of the codes that produce the open spatialization of the French pesticides sales data ([BNV-D](https://ventes-produits-phytopharmaceutiques.eaufrance.fr/)) following the procedure described in [Ramalanjaona et al. (2020)](https://odr.inrae.fr/intranet/carto_joomla/files/Note_methodo_calculcoeff_vf.pdf). -## Getting started +## Data used -To make it easy for you to get started with GitLab, here's a list of recommended next steps. +The spatialization builds on these data: -Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)! +- BNV-D +- Hexaposte +- RPG +- RPG complété +- BD TOPO +- Statisques agricoles annuelles +- E-Phy -## Add your files +## Software -- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files -- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command: +The calculation are made using **PostgreSQL** and its spatial extension **PostGIS**. Orchestration and other operations are done via **R** and **python**. -``` -cd existing_repo -git remote add origin https://forgemia.inra.fr/anna.lungarska/obnvds-publique.git -git branch -M main -git push -uf origin main -``` - -## Integrate with your tools - -- [ ] [Set up project integrations](https://forgemia.inra.fr/anna.lungarska/obnvds-publique/-/settings/integrations) - -## Collaborate with your team - -- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/) -- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html) -- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically) -- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/) -- [ ] [Set auto-merge](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html) - -## Test and Deploy - -Use the built-in continuous integration in GitLab. - -- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html) -- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing (SAST)](https://docs.gitlab.com/ee/user/application_security/sast/) -- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html) -- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/) -- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html) +## Description -*** +The code here -# Editing this README +## Installation -When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thanks to [makeareadme.com](https://www.makeareadme.com/) for this template. +No particular installation for these scripts, a **PostrgreSQL** server should be set up and the **PostGIS** extension is necessary if spatial information is to be manipulated. For **R** the packages are: `tidyverse, RPostgreSQL, here, readxl` and some features work with the **R Studio** software. For **python** the packages are: `configparser, datetime, psycopg2`. -## Suggestions for a good README +## Usage -Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information. +The spatialization of the BNV-D data allows the attribution of phytopharmaceutical products and substances quantities at the plot level. This scale is useful for intersecting with other spatial units and further reagregation. The results are not intended to be interpreted at the plot level, they are meaningless at this scale. -## Name -Choose a self-explaining name for your project. +## Support -## Description -Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors. +We do not provide support for the code except for common projects. Address any queries to US ODR, INRAE, [bnvd-spatialisation@inrae.fr](mailto:bnvd-spatialisation@inrae.fr) . -## Badges -On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge. +## Roadmap -## Visuals -Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method. +Future developments are to come. -## Installation -Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection. +## Contributing -## Usage -Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README. +We are accepting feedback on the code. -## Support -Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc. +## Authors and acknowledgment -## Roadmap -If you have ideas for releases in the future, it is a good idea to list them in the README. +The code is written by Anna Lungarska, Lovasoa Ramalanjaona and is property of INRAE, France. It builds on the work of Camille Truche, Claire Séard, Eva Groshens, Marie Carles, Pascal Filippi, and Olivier Lision under the supervision by Eric Cahuzac and Thomas Poméon. It has also been made possible by the work of Pierre Cantelaube and Benjamin Lardot. -## Contributing -State if you are open to contributions and what your requirements are for accepting them. +## License -For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self. +Distributed under GNU AGPLv3 license. -You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser. +## References -## Authors and acknowledgment -Show your appreciation to those who have contributed to the project. +Cantelaube P. et Lardot B., (2021) [Construction d’une base de données géographiques à échelle fine exhaustive sur l’occupation agricole du sol : Le « RPG complété ». Partie 2 : Attribution des cultures aux parcelles susceptibles d’accueillir les surfaces agricoles hors RPG](https://hal.inrae.fr/hal-03818008). Version 1. Note Méthodologue US ODR, INRAE, février 2021, <https://hal.inrae.fr/hal-03818008> -## License -For open source projects, say how it is licensed. +Lardot B., Cantelaube P., Carles M., Séard C., Truche C. et Poméon T., (2021). [Construction d’une base de données géographiques exhaustive à échelle fine sur l’occupation agricole du sol : Le « RPG complété ». Partie 1 : Production de la couche géographique des parcelles susceptibles d’accueillir les surfaces agricoles hors RPG](https://hal.inrae.fr/hal-03647500). Version 1. Note Méthodologique US ODR, INRAE, février 2021, <https://hal.inrae.fr/hal-03647500> -## Project status -If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers. +Ramalanjaona L., Poméon T., Ballot R., Barbu C., Bougon N., Fernandez F., Martin P., Mohamed H. (2020). [Mise à jour du calcul des coefficients de répartition spatiale des données de la BNVd](https://hal.inrae.fr/hal-03594944/). Note méthodologique ODR, 2020. <https://hal.inrae.fr/hal-03594944/> diff --git a/src/00.connexion_sql.R b/src/00.connexion_sql.R new file mode 100644 index 0000000..0ec6d2c --- /dev/null +++ b/src/00.connexion_sql.R @@ -0,0 +1,16 @@ +### Connection to the psql database +drv = dbDriver("PostgreSQL") + +username = rstudioapi::showPrompt("Database user", + "Username", + default = "") + +host_ip = '' +db_atelier_name = '' +psql_port = 5432 #readline("Port ? ") + +con_atelier = dbConnect(drv, host=host_ip, port=psql_port, dbname = db_atelier_name, + user = username, password= rstudioapi::askForPassword(prompt = "Password")) + +# optionnel, si plusieurs a travailler sur le meme projet +dbSendQuery(con_atelier, "SET ROLE gr_db_atelier_ecophyto;") \ No newline at end of file diff --git a/src/01.occsol.py b/src/01.occsol.py new file mode 100644 index 0000000..ce04887 --- /dev/null +++ b/src/01.occsol.py @@ -0,0 +1,81 @@ +import configparser + +config = configparser.ConfigParser() +config.read("config.ini") + +usern = config['credentials']['username'] +pw = config['credentials']['password'] +dbase = "" # working database +dbase_rpg = "" # database source if any + +import datetime +import psycopg2 +# Connect to an existing database + +conn = psycopg2.connect( + host="localhost", + database=dbase, + user=usern, + password=pw) + +# Open a cursor to perform database operations +cur = conn.cursor() + +annees = [2015, 2016, 2017, 2018, 2019, 2020] + +for a in annees: + + annee = str(a) + print(annee) + + cur.execute(""" + SET ROLE gr_db_atelier_ecophyto; + """) + + cur.execute(""" + SELECT schema_name FROM information_schema.schemata WHERE schema_name = 'obnvds_""" + annee + """'; + """) + + schema_ex = cur.fetchall() + + if (len(schema_ex) == 0): + cur.execute(""" + CREATE SCHEMA IF NOT EXISTS obnvds_""" + annee + """ AUTHORIZATION gr_db_atelier_ecophyto; + """) + + now = datetime.datetime.now() + + cur.execute(""" + COMMENT ON SCHEMA obnvds_""" + annee + """ IS + 'Schema created on """ + now.strftime("%m/%d/%Y, %H:%M:%S") + """ by """ + usern + """'; + """) + + # Creer la table obnvds_[annee].parcelles avec les attributs suivants : + # ilot : identifiant unique de la parcelle + # geom : geometrie des polygones des parcelles + # com_adm : code commune INSEE attribue a la parcelle + # cp_adm : code postal attribue a la parcelle + # occsol : groupe de culture/zna + # surf_ha : surface en hectare, calcul a partir de la geometrie + cur.execute("""CREATE TABLE obnvds_""" + annee + """.parcelles AS + + ; + """) + + now = datetime.datetime.now() + + cur.execute("""COMMENT ON TABLE obnvds_""" + annee + """.parcelles + IS 'creation : """ + now.strftime("%m/%d/%Y, %H:%M:%S") + """ by """ + usern + """ via le script 01.occsol.py'; + """) + + + cur.execute("""ALTER TABLE obnvds_""" + annee + """.parcelles ADD CONSTRAINT parcelles_pk PRIMARY KEY (ilot);""") + cur.execute("""CREATE INDEX pc_geom_idx ON obnvds_""" + annee + """.parcelles USING gist (geom);""") + cur.execute("""CREATE INDEX cp_occs_idx ON obnvds_""" + annee + """.parcelles USING btree (cp_adm, occsol);""") + + conn.commit() + + +#Close communication with the database +cur.close() +conn.close() diff --git a/src/02.BNVd.R b/src/02.BNVd.R new file mode 100644 index 0000000..2228486 --- /dev/null +++ b/src/02.BNVd.R @@ -0,0 +1,83 @@ +### R for RStudio*, utf8 +### 20/01/2022 ALungarska +### * the .rs.api.askForPassword() function called is RStudio dependent + +### This script gets the data on phyto sales, treats it and puts them into the +### BNVDs interface development app. + +#### Parametres annuels #### +annees = paste(2015:2020) + +#### Fonctions et packages #### +library(tidyverse) +library(RPostgreSQL) +library(here) + +`%ni%` = Negate(`%in%`) + +selfName = "obnvds/src/02.BNVd.R" + +read.bnvd = function(x, colcl){ + + return( + read.delim(x, colClasses = colcl, + sep = ";", na.strings = c('#N/A'), header = T) + ) + +} + +#### Connection to the psql database #### +source("src/00.connexion_sql.R") + +#### Lecture fichiers BNVd #### + +bnvd_sa_path = "BNVD_TRACABILITE_20231030_133113_ACHAT_/" +bnvd_amm_path = "BNVD_TRACABILITE_20231030_150046_ACHAT_/" + +cols_amm = c(rep("NULL", 5), "character", "NULL", + "character", "numeric", "character", "character", + rep("NULL", 7)) + +cols_sa = c(rep("NULL", 5), "character", "NULL", + "character", rep("NULL", 3), + rep("character", 6), "numeric") + +for (annee in annees){ + + print(annee) + + bdd_target = paste0("obnvds_", annee) + + #### BNV-D par AMM #### + bnvd_amm_file = list.files(bnvd_amm_path, full.names = T, pattern = paste0(annee, ".csv")) + + bnvd_amm = lapply(bnvd_amm_file, function(x){read.bnvd(x, colcl = cols_amm)}) + bnvd_amm = do.call(rbind, bnvd_amm) + + dbWriteTable(con_atelier, name = c(bdd_target, "bnvd_amm"), + value = bnvd_amm, row.names = F, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".bnvd_amm IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_atelier, .) + + #### Optimisation table vente #### + dbSendQuery(con_atelier, paste0('CREATE INDEX cp_amm_idx ON ', bdd_target,'.bnvd_amm (code_postal_acheteur, amm);')) + + #### BNV-D par SA #### + bnvd_sa_file = list.files(bnvd_sa_path, full.names = T, pattern = paste0(annee, ".csv")) + + bnvd_sa = lapply(bnvd_sa_file, function(x){read.bnvd(x, colcl = cols_sa)}) + bnvd_sa = do.call(rbind, bnvd_sa) + + dbWriteTable(con_atelier, name = c(bdd_target, "bnvd_sa"), + value = bnvd_sa, row.names = F, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".bnvd_sa IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_atelier, .) + + #### Optimisation table vente #### + dbSendQuery(con_atelier, paste0('CREATE INDEX cp_amm_sa_idx ON ', bdd_target,'.bnvd_sa (code_postal_acheteur, amm);')) + +} diff --git a/src/03.Calc_coef.R b/src/03.Calc_coef.R new file mode 100644 index 0000000..462564b --- /dev/null +++ b/src/03.Calc_coef.R @@ -0,0 +1,524 @@ +### R for RStudio*, utf8 +### 30/10/2023 ALungarska + +### This script calculates the coeficients for all combinations of CP, amm, and occsol +### BNVDs interface development app. + +#### Parametres annuels #### +annees = paste(2015:2020) +ephy = "ephy_v6" + +#### Fonctions et packages #### +library(tidyverse) +library(RPostgreSQL) +library(here) +library(readxl) + +`%ni%` = Negate(`%in%`) + +selfName = "obnvds/src/03.Calc_coef.R" + +#### Connection to the psql database #### +source("src/00.connexion_sql.R") +con_rpsql = con_atelier + + +for (annee in annees){ + + timestamp() + print(annee) + + bdd_target = paste0("obnvds_", annee) + + ######################selection amm dans la base ephy########################## + #amm dont les doses initiales dans ephy sont inconnues ou non spatialiables. + #On leur attribue la dose de 1kg/ha + + ### amm et unite dans le registre + table_registre_bnvd = paste0("SELECT DISTINCT amm, conditionnement, eaj + FROM ", bdd_target, ".bnvd_amm;") %>% + dbGetQuery(con_rpsql, .) + + #Verification si table existe + if(dbExistsTable(con_rpsql, name = c(bdd_target, "amm_cult_dhom_ephy_initial"))){ + + dbRemoveTable(con_rpsql, name = c(bdd_target, "amm_cult_dhom_ephy_initial")) + + } + + dbSendQuery(con_rpsql, paste0("CREATE TABLE ", bdd_target, ".amm_cult_dhom_ephy_initial AS + SELECT amm, unaccent_string(split_part(intitule,'*',1)) culture, + dose_spatialisable AS dhom, unite_spatialisable AS unite + FROM base_amm.", ephy, " + WHERE dose_spatialisable IS NOT NULL + AND split_part(intitule,'*',1) != 'Porte graine' + ORDER BY amm, culture;")) + + ## rajoute les amm non spatialisées avec de dose=1 kg/ha + amm_ns = dbGetQuery(con_rpsql, paste0("SELECT DISTINCT amm, unite_spatialisable + FROM base_amm.", ephy, + " WHERE amm IN ( + SELECT DISTINCT amm + FROM base_amm.", ephy, " + WHERE dose_spatialisable IS NULL + AND split_part(intitule,'*',1) != 'Porte graine' + );")) %>% + group_by(amm) %>% + tally() %>% + filter(n == 1) %>% + inner_join(table_registre_bnvd %>% + select(-eaj), by = "amm") %>% + mutate(dhom = 1, + unite = ifelse(conditionnement == "kg", "kg/ha", "L/ha")) %>% + select(amm, dhom, unite) + + amm_ns_add = dbGetQuery(con_rpsql, paste0("SELECT amm, unaccent_string(split_part(intitule,'*',1)) culture + FROM base_amm.", ephy, " + WHERE amm IN ( + '", paste(amm_ns$amm, collapse = "', '"), "' + ) AND split_part(intitule,'*',1) != 'Porte graine' + ORDER BY amm, culture;")) %>% + inner_join(amm_ns, by = "amm") + + dbWriteTable(con_rpsql, name = c(bdd_target, "amm_cult_dhom_ephy_initial"), + value = amm_ns_add, append = T, row.names = F) + + ###########selection des amm de la base ephy qui sont aussi dans le registre############ + + amm_cult_dhom_ephy_select = dbGetQuery(con_rpsql, paste0("SELECT * FROM ", + bdd_target, ".amm_cult_dhom_ephy_initial;")) + amm_cult_dhom_ephy_select = amm_cult_dhom_ephy_select[ + which(amm_cult_dhom_ephy_select$amm %in% unique(table_registre_bnvd$amm)), ] + + ## on enleve les amm dont les usages sont strictement du traitement post recolte: + ephy_table = dbGetQuery(con_rpsql, paste0("SELECT DISTINCT amm, split_part(intitule,'*',2) usage + FROM base_amm.", ephy, " + WHERE amm IN ( + SELECT DISTINCT amm + FROM base_amm.", ephy, " + WHERE intitule LIKE '%Trt Prod%' + ) GROUP BY amm, usage + ORDER BY amm;")) + + ephy_table = aggregate(usage ~ amm, ephy_table, unique) + + produit_post_rec = amm_cult_dhom_ephy_select[ + which(amm_cult_dhom_ephy_select$amm %in% + ephy_table$amm[which(ephy_table$usage == "Trt Prod. Réc.")]), ] + + amm_cult_dhom_ephy_select = amm_cult_dhom_ephy_select[ + -which(amm_cult_dhom_ephy_select$amm %in% + unique(ephy_table$amm[which(ephy_table$usage == "Trt Prod. Réc.")])), ] + + ## on enleve les amm dont les usages sont strictement du trt généraux pour de la désinfection de matériel, etc: + ephy_table = dbGetQuery(con_rpsql, paste0("SELECT DISTINCT amm, split_part(intitule, '*', 2) usage + FROM base_amm.", ephy, " + WHERE amm IN ( + SELECT DISTINCT amm + FROM base_amm.", ephy, " + WHERE intitule LIKE 'Traitement%' + ) GROUP BY amm, usage + ORDER BY amm;")) + + ephy_table = aggregate(usage ~ amm, ephy_table, unique) + + produit_post_rec = rbind(produit_post_rec, amm_cult_dhom_ephy_select[ + which(amm_cult_dhom_ephy_select$amm %in% + ephy_table$amm[which(ephy_table$usage %in% c("Désinsectisation", + "Fumigation (désinsectisation)", + "Désinfection"))]), ]) + + amm_cult_dhom_ephy_select = amm_cult_dhom_ephy_select[ + -which(amm_cult_dhom_ephy_select$amm %in% + unique(ephy_table$amm[which(ephy_table$usage %in% c("Désinsectisation", + "Fumigation (désinsectisation)", + "Désinfection"))])), ] + + dbWriteTable(con_rpsql, c(bdd_target, "produit_stockage_exclu"), + value = produit_post_rec, + append = FALSE, row.names = FALSE, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".produit_stockage_exclu IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_rpsql, .) + + ################# dose mediane par amm, culture, unite ######################## + + amm_cult_dhmed = aggregate(dhom ~ amm + culture + unite, amm_cult_dhom_ephy_select, median) + + colnames(amm_cult_dhmed) = sub("dhom", "dhmed", colnames(amm_cult_dhmed)) + + dbWriteTable(con_rpsql, c(bdd_target, "amm_cult_dhmed"), + value = amm_cult_dhmed, + append = FALSE, row.names = FALSE, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".amm_cult_dhmed IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_rpsql, .) + + ######### on recupere les surfaces des cultures de saa ########################## + + saa_region = dbGetQuery(con_rpsql, paste0("SELECT substring(region from 3 for 4) region, + n306_lib AS cult_saa, origine, SUM(valeur) surface_ha + FROM agreste.saa_2010_2021 + WHERE annref = '", annee, "' + GROUP BY region, cult_saa, origine;")) + + saa_region = saa_region[-which(saa_region$surface_ha == 0), ] + + ################## doses moyennes, ponderees par surface ####################### + + + ############################### calc_int ####################################### + conv_ephy_saa = dbGetQuery(con_rpsql, "SELECT unaccent_string(cult_ephy) cult_ephy, + cult_saa, origine_saa, code_gp + FROM donnees_references.conv_ephy6_saa_2010_2021 + WHERE code_gp NOT LIKE 'PP' + AND code_gp NOT LIKE 'STH' + AND code_gp NOT LIKE 'PT' + AND code_gp NOT LIKE 'GEL';") + + + amm_dhmed_cultephy_saa = left_join(amm_cult_dhmed, conv_ephy_saa, + by = c("culture" = "cult_ephy"), + relationship = "many-to-many") + + amm_dhmed_cult_surf_saa = left_join(amm_dhmed_cultephy_saa, saa_region, + by = c("cult_saa" = "cult_saa"), + relationship = "many-to-many") + + calc_int = aggregate(surface_ha ~ amm + region + culture + code_gp + unite + dhmed, + amm_dhmed_cult_surf_saa, sum) + + calc_int$sommeprod = calc_int$surface_ha * calc_int$dhmed + colnames(calc_int)[7] = "surf_cult" + colnames(calc_int)[4] = "occsol" + + ############################# dhref ########################################### + + amm_occsol_dhref = calc_int %>% + group_by(amm, region, occsol, unite) %>% + summarise(sum_sommeprod = sum(sommeprod, na.rm = T), + sum_surf_cult = sum(surf_cult), + .groups = "drop") + + amm_occsol_dhref$dhref = amm_occsol_dhref$sum_sommeprod / amm_occsol_dhref$sum_surf_cult + amm_occsol_dhref = amm_occsol_dhref[, -c(5, 6)] + + dbWriteTable(con_rpsql, c(bdd_target, "amm_occsol_dhref"), + value = amm_occsol_dhref, + append = FALSE, row.names = FALSE, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".amm_occsol_dhref IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_rpsql, .) + + dbSendQuery(con_rpsql, paste0("CREATE INDEX i_amm2 ON ", bdd_target, ".amm_occsol_dhref(amm);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX i_unit2 ON ", bdd_target, ".amm_occsol_dhref(unite);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX i_occsol2 ON ", bdd_target, ".amm_occsol_dhref(occsol);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX i_dhref2 ON ", bdd_target, ".amm_occsol_dhref(dhref);")) + + ############################################################################################### + ####################################### TG #################################################### + ############################################################################################### + + ############################## APPLICATION AGRICOLE ########################################### + + #insertion de csv des dhref et TG pour application agricole + # amm_occsol_dhref = dbGetQuery(con_rpsql, paste0("SELECT * FROM ", bdd_target, ".amm_occsol_dhref;")) + + #Verification si table existe + if(dbExistsTable(con_rpsql, name = c(bdd_target, "amm_traitementgeneraux_agri"))){ + + dbRemoveTable(con_rpsql, name = c(bdd_target, "amm_traitementgeneraux_agri")) + + } + + dbSendQuery(con_rpsql, paste0("CREATE TABLE ", bdd_target, ".amm_traitementgeneraux_agri AS + SELECT DISTINCT amm FROM base_amm.", ephy, " + WHERE LEFT(split_part(intitule, '*', 1), 11) = 'Traitements' + AND (gamme_usages = 'Professionnel' OR gamme_usages IS NULL) + ORDER BY amm;")) + + #Verification si table existe + if(dbExistsTable(con_rpsql, name = c(bdd_target, "dhmed_traitementsgeneraux"))){ + + dbRemoveTable(con_rpsql, name = c(bdd_target, "dhmed_traitementsgeneraux")) + + } + + dbSendQuery(con_rpsql, paste0("CREATE TABLE ", bdd_target, ".dhmed_traitementsgeneraux AS + SELECT * FROM ", bdd_target, ".amm_cult_dhmed + WHERE amm IN ( + SELECT amm FROM ", bdd_target, ".amm_traitementgeneraux_agri + ) AND culture LIKE '%raitements%' + AND amm NOT IN ( + SELECT DISTINCT amm FROM ", bdd_target, ".produit_stockage_exclu + ) ORDER BY 1;")) + + dhmed_traitementsgeneraux = dbGetQuery(con_rpsql, + paste0("SELECT * FROM ", bdd_target, ".dhmed_traitementsgeneraux;")) + + + #creation des vecteurs pour les classes d'occupation du sol (creees par camille) + regions + vect_occsol = c("BETT", "BLE", "CEREALES", "COLZA", "COQUE", "DIVERS", "FEV", + "FIBRE", "FOURR", "INDUS", "LEGU", "LFOURR", "LGRAIN", "MAIS", + "OLEA", "OLIVE", "ORGE", "PDT", "POIS", "PROTEA", "RIZ", "SOJA", + "TOUR", "VERGERS", "VIGNES") + + if (annee == 2021){ + + vect_reg = c("11", "24", "27", "28", "32", "44", "52", + "53", "75", "76", "84", "93", "94") + + } else { + vect_reg = c("01", "02", "03", "04", "06", "11", "24", "27", "28", + "32", "44", "52", "53", "75", "76", "84", "93", "94") + #vect_reg<-paste("R",vect_reg,sep="") + } + + # application des produits en TG sur les differentes occupations du sol, pour toutes les regions. sortie: amm_occsol_dhref_tg + amm = dhmed_traitementsgeneraux$amm + unite = dhmed_traitementsgeneraux$unite + dhmed = dhmed_traitementsgeneraux$dhmed + + tab_amm = data.frame(amm = amm[gl(length(amm), length(vect_occsol) * length(vect_reg))], + unite = unite[gl(length(unite), length(vect_occsol) * length(vect_reg))], + dhref = dhmed[gl(length(dhmed), length(vect_occsol) * length(vect_reg))]) + + region = rep(vect_reg[gl(length(vect_reg), length(vect_occsol))], times = length(amm)) + occsol = rep(vect_occsol, length(amm) * length(vect_reg)) + amm_occsol_dhref_tg = cbind(tab_amm, region, occsol) + + + #on regroupe les deux tableaux pour pouvoir faire une moyenne des dhref pour chaque amm+region+occsol + amm_occsol_dhref_tg = rbind(amm_occsol_dhref, amm_occsol_dhref_tg) + amm_occsol_dhref_2 = aggregate(dhref ~ amm + region + occsol + unite, amm_occsol_dhref_tg, mean) + + + ############### APPLICATION NON AGRICOLE - professionnel ###################### + + amm_cult_dhmed = dbGetQuery(con_rpsql, paste0("SELECT * FROM ", bdd_target, ".amm_cult_dhmed;")) + + amm_zna_pro = amm_cult_dhmed[which(amm_cult_dhmed$culture %in% c("Traitements generaux", + "Usages non agricoles", + "JEVI")), ] + + amm_zna_pro_mean = as.data.frame(aggregate(dhmed ~ amm + unite, amm_zna_pro, mean)) + + amm_zna_pro_dhref = amm_zna_pro_mean[rep(1:nrow(amm_zna_pro_mean), length(vect_reg)), ] + amm_zna_pro_dhref[, "region"] = vect_reg[gl(length(vect_reg), length(unique(amm_zna_pro_mean$amm)))] + amm_zna_pro_dhref[, "occsol"] = "zna_pro" + colnames(amm_zna_pro_dhref) = sub("dhmed", "dhref", colnames(amm_zna_pro_dhref)) + + #on rajoute au tableau contenant les dhref des cultures les dhref des zna_pro + amm_occsol_dhref_2 = rbind(amm_occsol_dhref_2, amm_zna_pro_dhref) + + ################ APPLICATION NON AGRICOLES - non professionnel ################# + + amm_ZNA_non_pro_dhmed = dbGetQuery(con_rpsql, paste0("SELECT * FROM ", bdd_target, ".amm_cult_dhmed + WHERE (culture LIKE 'Jardin%' + OR culture LIKE 'Trai%' + OR culture LIKE 'Usa%');")) + # OR culture LIKE 'JEVI%');")) # Issue #2 GIT + + amm_ZNA_non_pro_dhmed = amm_ZNA_non_pro_dhmed[ + which(amm_ZNA_non_pro_dhmed$amm %in% + unique(table_registre_bnvd$amm[which(table_registre_bnvd$eaj == "Oui")])), ] + + amm_ZNA_non_pro_dhref = aggregate(dhmed ~ amm + unite, amm_ZNA_non_pro_dhmed, mean) + + amm_ZNA_non_pro_dhref[, 'occsol'] = "zna_non_pro" + amm_ZNA_non_pro_dhref = amm_ZNA_non_pro_dhref[rep(1:nrow(amm_ZNA_non_pro_dhref), length(vect_reg)), ] + amm_ZNA_non_pro_dhref[, "region"] = vect_reg[gl(length(vect_reg), length(unique(amm_ZNA_non_pro_dhmed$amm)))] + colnames(amm_ZNA_non_pro_dhref) = sub("dhmed", "dhref", colnames(amm_ZNA_non_pro_dhref)) + + #on rajoute au tableau contenant les dhref des cultures les dhref des zna_non_pro + amm_occsol_dhref_2 = rbind(amm_occsol_dhref_2, amm_ZNA_non_pro_dhref) + #amm_occsol_dhref_2$region<-substr(amm_occsol_dhref_2$region,2,3) + + dbWriteTable(con_rpsql, c(bdd_target, "amm_occsol_dhref_2"), + value = amm_occsol_dhref_2, + append = FALSE, row.names = FALSE, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".amm_occsol_dhref_2 IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_rpsql, .) + + dbSendQuery(con_rpsql, paste0("CREATE INDEX dhref_amm ON ", bdd_target, ".amm_occsol_dhref_2(amm);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX dhref_unit ON ", bdd_target, ".amm_occsol_dhref_2(unite);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX dhref_occsol ON ", bdd_target, ".amm_occsol_dhref_2(occsol);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX dhref_dhref ON ", bdd_target, ".amm_occsol_dhref_2(dhref);")) + + rm(amm_ZNA_non_pro_dhref, amm_ZNA_non_pro_dhmed, amm_zna_pro_dhref, amm_occsol_dhref_tg, + calc_int, tab_amm,amm_occsol_dhref, amm_dhmed_cult_surf_saa, + amm_cult_dhmed, amm_cult_dhom_ephy_select) + + gc() + + ################################################################################################### + ########################### SURFACES PAR CP ET OCCUPATION DU SOL ################################## + ################################################################################################### + + #Verification si table existe + if(dbExistsTable(con_rpsql, name = c(bdd_target, "surf_cp"))){ + + dbRemoveTable(con_rpsql, name = c(bdd_target, "surf_cp")) + + } + + dbSendQuery(con_rpsql, paste0("CREATE TABLE ", bdd_target, ".surf_cp AS ( + SELECT region, cp_adm, occsol, + SUM(a.surf_ha) AS surf_gc_cp + FROM ", bdd_target, ".parcelles a + INNER JOIN ( + SELECT DISTINCT code_region region, + code_commune_insee com_adm + FROM donnees_references.com_dep_reg) regs + USING(com_adm) + GROUP BY region, cp_adm, occsol);")) + + dbSendQuery(con_rpsql, paste0("CREATE INDEX i_occsol ON ", bdd_target, ".surf_cp (occsol);")) + dbSendQuery(con_rpsql, paste0("CREATE INDEX i_cp ON ", bdd_target, ".surf_cp (cp_adm);")) + + #ajout d'un coef du fait qu'un cp puisse avoir deux regions + surf_cp = dbReadTable(con_rpsql, name = c(bdd_target, "surf_cp")) + surf_cp_all = surf_cp + + surf_cp = aggregate(surf_gc_cp ~ cp_adm + region, surf_cp, sum) + colnames(surf_cp)[3] = "surf_cp_reg" + surf_cp_1 = aggregate(surf_cp_reg ~ cp_adm, surf_cp,sum) + colnames(surf_cp_1)[2] = "surf_tot_cp" + surf_cp = left_join(surf_cp, surf_cp_1, by = "cp_adm") + surf_cp$coef = surf_cp$surf_cp_reg / surf_cp$surf_tot_cp + + dbWriteTable(con_rpsql, c(bdd_target, "surf_cp_b"), + value = surf_cp, append = FALSE, row.names = FALSE, overwrite = T) + + surf_cp_bireg = surf_cp %>% + filter(coef != 1) %>% + select(cp_adm, region) %>% + left_join(surf_cp_all, by = c("cp_adm", "region")) %>% + filter(surf_gc_cp > 0) %>% + left_join(amm_occsol_dhref_2, by = c("region", "occsol"), + relationship = "many-to-many") %>% + mutate(surf_dose = surf_gc_cp * dhref) %>% + group_by(cp_adm, region, amm, unite) %>% + summarise(surf_reg_cp_amm = sum(surf_dose, na.rm = T), .groups = "drop") + + surf_cp_bireg_1 = surf_cp_bireg %>% + group_by(amm, cp_adm) %>% + summarise(surf_tot_cp_amm = sum(surf_reg_cp_amm, na.rm = T), .groups = "drop") + + surf_cp_bireg = surf_cp_bireg %>% + left_join(surf_cp_bireg_1, by = c("cp_adm", "amm")) %>% + mutate(coef = surf_reg_cp_amm/surf_tot_cp_amm) %>% + filter(coef != 1) %>% + select(cp_adm, region, amm, unite, coef) + + dbWriteTable(con_rpsql, c(bdd_target, "surf_cp_bireg"), + value = surf_cp_bireg, append = FALSE, row.names = FALSE, overwrite = T) + + + ########### CALCUL DES COEF PAR CODE POSTAL, PAR AMM et PAR OCCSOL ############# + + timestamp() + print("calcul des coefs") + + #Verification si table existe + if(dbExistsTable(con_rpsql, name = c(bdd_target, "amm_surf_cp"))){ + + dbRemoveTable(con_rpsql, name = c(bdd_target, "amm_surf_cp")) + + } + + dbSendQuery(con_rpsql, + paste0("CREATE TABLE ", bdd_target, ".amm_surf_cp AS + SELECT region, occsol, amm, unite, + dhref, cp_adm, surf_gc_cp, + dhref*surf_gc_cp AS dhref_surf + FROM ", bdd_target, + ".amm_occsol_dhref_2 + INNER JOIN ", bdd_target, + ".surf_cp USING(region, occsol) + WHERE dhref IS NOT NULL + AND dhref > 0 + AND surf_gc_cp > 0;")) + + #Verification si table existe + if(dbExistsTable(con_rpsql, name = c(bdd_target, "coef_amm_cp"))){ + + dbRemoveTable(con_rpsql, name = c(bdd_target, "coef_amm_cp")) + + } + + dbSendQuery(con_rpsql, + paste0("CREATE TABLE ", bdd_target, ".coef_amm_cp AS ( + WITH a AS ( + SELECT SUM(dhref_surf) AS qph, + region, cp_adm, amm, unite + FROM ", bdd_target, ".amm_surf_cp + GROUP BY region, cp_adm, amm, unite + ) + SELECT cp_adm, region, amm, unite, occsol, surf_gc_cp, qph, + CASE WHEN scpb.coef IS NULL THEN (dhref/qph) + ELSE (dhref/qph)*scpb.coef END AS coef + FROM ", bdd_target, ".amm_surf_cp + INNER JOIN a USING(region, cp_adm, amm, unite) + LEFT JOIN ", bdd_target, ".surf_cp_bireg scpb USING(region, cp_adm, amm, unite) + );")) + + dbSendQuery(con_rpsql, paste0("ALTER TABLE ", bdd_target, ".coef_amm_cp + ADD CONSTRAINT pk_coef PRIMARY KEY (cp_adm, region, amm, unite, occsol);")) + + paste0("COMMENT ON TABLE ", bdd_target, ".coef_amm_cp IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_rpsql, .) + + #16/07/20: check-up occsol + occsol_coef = dbGetQuery(con_rpsql, paste0("SELECT DISTINCT occsol + FROM ", bdd_target, ".coef_amm_cp + WHERE occsol IN ( + SELECT DISTINCT occsol + FROM ", bdd_target, ".parcelles) + ORDER BY occsol;")) + + occsol_coef = occsol_coef$occsol + + for(i in 1:length(vect_occsol)){ + + if(isTRUE(vect_occsol[i] %ni% occsol_coef)){ + + print("error occsol_coef") + + } + + } + + + + somme_coef = dbGetQuery(con_rpsql, paste0("SELECT a.cp_adm, a.region, amm, unite, + SUM(coef*a.surf_gc_cp) AS somme1 + FROM ", bdd_target, ".coef_amm_cp a + JOIN ", bdd_target, ".surf_cp + USING(cp_adm, region, occsol) + GROUP BY a.cp_adm, a.region, amm, unite;")) + + somme_coef = left_join(somme_coef, surf_cp_bireg, by = c("cp_adm", "region", "amm", "unite")) %>% + mutate(somme = ifelse(is.na(coef), somme1, somme1/coef)) + + somme_coef = somme_coef[, c("cp_adm", "region", "amm", "unite", "somme")] + + dbWriteTable(con_rpsql, c(bdd_target, "somme_coef"), + value = somme_coef, append = FALSE, row.names = FALSE, overwrite = T) + + dbSendQuery(con_rpsql, paste0("DROP TABLE ", bdd_target, ".amm_cult_dhom_ephy_initial, + ", bdd_target, ".amm_traitementgeneraux_agri, + ", bdd_target, ".dhmed_traitementsgeneraux;")) + +} + +dbDisconnect(con_rpsql) + + + diff --git a/src/04.BNVd_corrige_cp.R b/src/04.BNVd_corrige_cp.R new file mode 100644 index 0000000..993f3bb --- /dev/null +++ b/src/04.BNVd_corrige_cp.R @@ -0,0 +1,195 @@ +### R for RStudio*, utf8 +### 30/11/2023 ALungarska +### * the .rs.api.askForPassword() function called is RStudio dependent + +### This script gets the data on phyto sales and changes some postalcodes +### in order to attribute them + + +#### Parametres annuels #### +annees = paste(2015:2020) + +#### Fonctions et packages #### +library(tidyverse) +library(RPostgreSQL) +library(here) + +`%ni%` = Negate(`%in%`) + +selfName = "obnvds/src/04.BNVd_corrige_cp.R" + +#### Connection to the psql database #### +source("src/00.connexion_sql.R") + + +### Donnees code postal -> code insee +# Tableau contenant les correspondances entre communes et codes postaux +cp_hexaposte_long = paste0("SELECT code_postal cp, code_insee insee + FROM test_lova.insee_cp_2021;") %>% + dbGetQuery(con_atelier, .) %>% + distinct() + +col_types = rep("character", 6) + +# Donnees de La Poste, https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/ +cp_hexasmal = here("data", "laposte_hexasmal.csv") %>% + read.delim(sep = ";", header = T, colClasses = col_types) %>% + select(insee = Code_commune_INSEE, cp = Code_postal) + +# Extraction de la base SIRENE pour les codes postaux des entreprises et les communes +cedex = here("data", "codes_postaux_cedex.csv") %>% + read.delim(header = T, sep = ";") %>% + filter(cp != "" & insee != "" & cp != "00000") %>% + add_row(cp_hexaposte_long) %>% + add_row(cp_hexasmal) %>% + distinct() + +for (annee in annees){ + + timestamp() + print(annee) + + bdd_target = paste0("obnvds_", annee) + + ### Recupere les cp_siege et les codes insee associes effectivement presents + cp_insee_occsol = paste0("SELECT DISTINCT cp_adm, com_adm insee + FROM ", bdd_target, ".parcelles;") %>% + dbGetQuery(con_atelier, .) + + ### BNVd filtre pour les cas ou le CP n'est pas parmi les CP siege de l'occsol + bnvd_amm = dbReadTable(con_atelier, c(bdd_target, "bnvd_amm")) + + bnvd_absent = bnvd_amm %>% + rename(cp = code_postal_acheteur, AMM = amm) %>% + filter(cp %ni% cp_insee_occsol$cp_adm) %>% + select(cp, AMM) %>% + distinct() + + bnvd_cedex = bnvd_absent %>% + left_join(cedex, by = "cp") %>% + left_join(cp_insee_occsol, by = "insee") %>% + select(-insee) %>% + distinct() %>% + filter(!is.na(cp_adm) & cp != "00000" & cp_adm != "00000") + + ### Empiriquement, on a un ou plusuiers cp_adm associes au cp inconnu de la bnvd + ### On dispatch la vente du cp_adm inconnu des .......... + ### au prorata selon les surfaces associées aux AMM du cp_adm + + surfaces = paste0("SELECT cp_adm, occsol, sum(surf_ha) surf_ha FROM ", bdd_target, + ".parcelles + WHERE cp_adm IN('", + paste(unique(bnvd_cedex$cp_adm), collapse = "', '"), + "') + GROUP BY cp_adm, occsol;") %>% + dbGetQuery(con_atelier, .) + + ### Donne les occsols associes a l'AMM + coef = paste0("SELECT distinct amm, occsol FROM ", bdd_target, + ".coef_amm_cp WHERE amm IN ('", + paste(unique(bnvd_cedex$AMM), collapse = "','"), + "');") %>% + dbGetQuery(con_atelier, .) + + ### Doses + doses = paste0("SELECT amm, occsol, AVG(dhref) dhref FROM ", + bdd_target, + ".amm_occsol_dhref_2 WHERE amm IN('", + paste(unique(bnvd_cedex$AMM), collapse = "','"), + "') + GROUP BY amm, occsol;") %>% + dbGetQuery(con_atelier, .) + + ### Calcul des quantites potentielles selon l'occsol + correctif = bnvd_cedex %>% + inner_join(coef, by = c("AMM" = "amm")) %>% + inner_join(surfaces, by = c("cp_adm", "occsol" = "occsol")) %>% + inner_join(doses, by = c("occsol", "AMM" = "amm")) %>% + mutate(potentiel = surf_ha*dhref) + + ### Somme des potentiel par cp et AMM + potsum = correctif %>% + group_by(cp, AMM) %>% + summarise(pot_sum = sum(potentiel), .groups = "drop") + + ### Somme des potentiel par cp, AMM et cp_siege et calcul du coefficient + correctif_coef = correctif %>% + group_by(cp, AMM, cp_adm) %>% + summarise(potentiel = sum(potentiel), .groups = "drop") %>% + inner_join(potsum, by = c("cp", "AMM")) %>% + mutate(coef_pond = potentiel/pot_sum) %>% + select(cp, AMM, cp_new = cp_adm, coef_pond) %>% + distinct() + + dbWriteTable(con_atelier, + c(bdd_target, "correctif_cp_coef"), + correctif_coef, + overwrite = T, + row.names = F) + + paste0("COMMENT ON TABLE ", bdd_target, ".correctif_cp_coef IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_atelier, .) + + + bnvd_amm_cor = bnvd_amm %>% + filter(amm != "nc") %>% + left_join(correctif_coef, by = c("code_postal_acheteur" = "cp", + "amm" = "AMM")) %>% + mutate(quantite_produit = ifelse(!is.na(cp_new) & !is.na(coef_pond), + quantite_produit*coef_pond, + quantite_produit), + code_postal_acheteur = ifelse(!is.na(cp_new), + cp_new, + code_postal_acheteur), + conditionnement = ifelse(conditionnement == "kg", + "kg/ha", "L/ha")) %>% + select(-coef_pond, -cp_new) + + + dbWriteTable(con_atelier, name = c(bdd_target, "bnvd_amm_cor"), + value = bnvd_amm_cor, row.names = F, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".bnvd_amm_cor IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_atelier, .) + + dbSendQuery(con_atelier, paste0('CREATE INDEX cp_amm_cor_idx ON ', + bdd_target, + '.bnvd_amm_cor (code_postal_acheteur, amm);')) + + + bnvd_sa = dbReadTable(con_atelier, c(bdd_target, "bnvd_sa")) + bnvd_sa_cor = bnvd_sa %>% + filter(amm != "nc") %>% + left_join(correctif_coef, by = c("code_postal_acheteur" = "cp", + "amm" = "AMM")) %>% + mutate(quantite_substance = ifelse(!is.na(cp_new) & !is.na(coef_pond), + quantite_substance*coef_pond, + quantite_substance), + code_postal_acheteur = ifelse(!is.na(cp_new), + cp_new, + code_postal_acheteur)) %>% + left_join(bnvd_amm_cor %>% + select(code_postal_acheteur, amm, conditionnement) %>% + distinct(), + by = c("code_postal_acheteur", "amm")) %>% + select(-coef_pond, -cp_new) + + + dbWriteTable(con_atelier, name = c(bdd_target, "bnvd_sa_cor"), + value = bnvd_sa_cor, row.names = F, overwrite = T) + + paste0("COMMENT ON TABLE ", bdd_target, ".bnvd_sa_cor IS 'creation : ", + date(), " par ", username ," via le script ", selfName , "';") %>% + dbSendQuery(con_atelier, .) + + dbSendQuery(con_atelier, paste0('CREATE INDEX cp_amm_sa_cor_idx ON ', + bdd_target, + '.bnvd_sa_cor (code_postal_acheteur, amm);')) + + +} # fin boucle annee + +dbDisconnect(con_atelier) + diff --git a/src/05.Precalcul_com_adm.py b/src/05.Precalcul_com_adm.py new file mode 100644 index 0000000..e5e0e39 --- /dev/null +++ b/src/05.Precalcul_com_adm.py @@ -0,0 +1,164 @@ +import configparser + +config = configparser.ConfigParser() +config.read("/config.ini") + +usern = config['credentials']['username'] +pw = config['credentials']['password'] +dbase = "" +dbase_rpg = "" + +import datetime +import psycopg2 +# Connect to an existing database + +conn = psycopg2.connect( + host="localhost", + database=dbase, + user=usern, + password=pw) + +# Open a cursor to perform database operations +cur = conn.cursor() + +annees = range(2015, 2021, 1) + +for a in annees: + + bdd_target = "obnvds_" + str(a) + print(str(a)) + + cur.execute(""" + SET ROLE gr_db_atelier_ecophyto; + """) + + cur.execute(""" + SELECT EXISTS ( + SELECT FROM + pg_tables + WHERE + schemaname = '""" + bdd_target + """' AND + tablename = 'com_adm_occsol' + ); + """) + + table_ex = cur.fetchall() + + if (table_ex[0][0] == False): + cur.execute(""" + CREATE TABLE """ + bdd_target + """.com_adm_occsol AS ( + WITH regs AS ( + SELECT DISTINCT code_region region, + code_commune_insee com_adm + FROM donnees_references.com_dep_reg + ) + SELECT com_adm, region, cp_adm, occsol, SUM(surf_ha) surf_ha + FROM """ + bdd_target + """.parcelles + INNER JOIN regs USING(com_adm) + GROUP BY com_adm, region, cp_adm, occsol + ); + """) + + now = datetime.datetime.now() + + cur.execute(""" + COMMENT ON TABLE """ + bdd_target + """.com_adm_occsol IS + 'Schema created on """ + now.strftime("%d/%m/%Y, %H:%M:%S") + """ by """ + usern + """'; + """) + + cur.execute(""" + CREATE INDEX cp_occs_adms ON """ + bdd_target + """.com_adm_occsol (region, cp_adm, occsol); + """) + + + cur.execute(""" + SELECT EXISTS ( + SELECT FROM + pg_tables + WHERE + schemaname = '""" + bdd_target + """' AND + tablename = 'com_adm_occsol_qsa' + ); + """) + + table_ex = cur.fetchall() + + if (table_ex[0][0] == True): + + cur.execute(""" + DROP TABLE """ + bdd_target + """.com_adm_occsol_qsa; + """) + + + cur.execute("""CREATE TABLE """ + bdd_target + """.com_adm_occsol_qsa AS + SELECT cpoccs.com_adm, cpoccs.cp_adm, cpoccs.occsol, + bnvd.substance, cas, classification, classification_mention, + code_sandre_substance, fonction_substance, + SUM(cpoccs.surf_ha * quantite_substance * coef) qsa + FROM """ + bdd_target + """.com_adm_occsol cpoccs + INNER JOIN """ + bdd_target + """.coef_amm_cp coefs USING(region, cp_adm, occsol) + INNER JOIN """ + bdd_target + """.bnvd_sa_cor bnvd + ON (cpoccs.cp_adm = bnvd.code_postal_acheteur + AND coefs.amm = bnvd.amm + AND coefs.unite = bnvd.conditionnement) + GROUP BY cpoccs.com_adm, cpoccs.cp_adm, cpoccs.occsol, bnvd.substance, cas, + classification, classification_mention, + code_sandre_substance, fonction_substance;""") + + now = datetime.datetime.now() + + cur.execute(""" + COMMENT ON TABLE """ + bdd_target + """.com_adm_occsol_qsa IS + 'Table created on """ + now.strftime("%d/%m/%Y, %H:%M:%S") + """ by """ + usern + """. + Precalcul at the commune level'; + """) + + cur.execute("""CREATE INDEX com_adm_qsax ON """ + bdd_target + """.com_adm_occsol_qsa (com_adm);""") + + cur.execute(""" + SELECT EXISTS ( + SELECT FROM + pg_tables + WHERE + schemaname = '""" + bdd_target + """' AND + tablename = 'com_adm_occsol_amm' + ); + """) + + table_ex = cur.fetchall() + + if (table_ex[0][0] == True): + + cur.execute(""" + DROP TABLE """ + bdd_target + """.com_adm_occsol_amm; + """) + + + cur.execute("""CREATE TABLE """ + bdd_target + """.com_adm_occsol_amm AS + SELECT cpoccs.com_adm, cpoccs.cp_adm, cpoccs.occsol, + bnvd.amm, conditionnement, eaj, + SUM(cpoccs.surf_ha * quantite_produit * coef) quantite_produit + FROM """ + bdd_target + """.com_adm_occsol cpoccs + INNER JOIN """ + bdd_target + """.coef_amm_cp coefs USING(region, cp_adm, occsol) + INNER JOIN """ + bdd_target + """.bnvd_amm_cor bnvd + ON (cpoccs.cp_adm = bnvd.code_postal_acheteur + AND coefs.amm = bnvd.amm + AND coefs.unite = bnvd.conditionnement) + GROUP BY cpoccs.com_adm, cpoccs.cp_adm, cpoccs.occsol, bnvd.amm, conditionnement, eaj;""") + + now = datetime.datetime.now() + + cur.execute(""" + COMMENT ON TABLE """ + bdd_target + """.com_adm_occsol_amm IS + 'Table created on """ + now.strftime("%d/%m/%Y, %H:%M:%S") + """ by """ + usern + """. + Precalcul at the commune level'; + """) + + cur.execute("""CREATE INDEX com_adm_ammx ON """ + bdd_target + """.com_adm_occsol_amm (com_adm);""") + + conn.commit() + + +#Close communication with the database +cur.close() +conn.close() -- GitLab