Skip to content
Snippets Groups Projects
Verified Commit 2190f7ac authored by Célia Michotey's avatar Célia Michotey Committed by Raphaël Flores
Browse files

Finish indexation script and update mappings.

parent 5dad7baf
No related branches found
No related tags found
1 merge request!41Create script to index data in FAIDARE.
Showing
with 664 additions and 264 deletions
......@@ -91,6 +91,9 @@ local.properties
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
*.iml
*.ipr
# User-specific stuff
.idea/*
.idea/**/workspace.xml
......
{
"datadiscovery": {
"dynamic": "strict",
"dynamic": "false",
"_source": {
"includes": ["@id", "@type", "schema:*", "groupId"]
},
......@@ -30,6 +30,19 @@
"type": "keyword"
},
"germplasmURI": {
"type": "keyword"
},
"germplasmDbId": {
"type": "keyword"
},
"studyURIs": {
"type": "keyword"
},
"studyDbIds": {
"type": "keyword"
},
"germplasm": {
"type": "object",
"properties": {
......
......@@ -2,38 +2,46 @@
"germplasmAttribute": {
"dynamic": "strict",
"properties": {
"germplasmAttributeURI": {
"type": "keyword"
},
"germplasmAttributeDbId": {
"type": "keyword"
},
"germplasmURI": {
"type": "keyword"
},
"germplasmDbId": {
"type": "keyword"
},
"data": {
"type": "nested",
"properties": {
"attributeDbId": {
"attributeURI": {
"type": "keyword"
},
"attributeName": {
"attributeDbId": {
"type": "keyword"
},
"attributeCode": {
"type": "keyword"
},
"value": {
"attributeName": {
"type": "keyword"
},
"determinedDate": {
"type": "date",
"format": "YYYY-MM-dd"
},
"value": {
"type": "keyword"
}
}
},
"documentationURL": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......@@ -57,4 +65,4 @@
}
}
}
}
\ No newline at end of file
}
......@@ -2,13 +2,19 @@
"germplasmPedigree": {
"dynamic": "strict",
"properties": {
"germplasmDbId": {
"germplasmPedigreeURI": {
"type": "keyword"
},
"defaultDisplayName": {
"germplasmPedigreeDbId": {
"type": "keyword"
},
"pedigree": {
"germplasmURI": {
"type": "keyword"
},
"germplasmDbId": {
"type": "keyword"
},
"defaultDisplayName": {
"type": "keyword"
},
"crossingPlan": {
......@@ -20,6 +26,9 @@
"familyCode": {
"type": "keyword"
},
"parent1URI": {
"type": "keyword"
},
"parent1DbId": {
"type": "keyword"
},
......@@ -29,6 +38,9 @@
"parent1Type": {
"type": "keyword"
},
"parent2URI": {
"type": "keyword"
},
"parent2DbId": {
"type": "keyword"
},
......@@ -38,8 +50,14 @@
"parent2Type": {
"type": "keyword"
},
"pedigree": {
"type": "keyword"
},
"siblings": {
"properties": {
"germplasmURI": {
"type": "keyword"
},
"germplasmDbId": {
"type": "keyword"
},
......@@ -49,13 +67,9 @@
}
},
"documentationURL": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,6 +2,15 @@
"germplasmProgeny": {
"dynamic": "strict",
"properties": {
"germplasmProgenyURI": {
"type": "keyword"
},
"germplasmProgenyDbId": {
"type": "keyword"
},
"germplasmURI": {
"type": "keyword"
},
"germplasmDbId": {
"type": "keyword"
},
......@@ -10,6 +19,9 @@
},
"progeny": {
"properties": {
"germplasmURI": {
"type": "keyword"
},
"germplasmDbId": {
"type": "keyword"
},
......@@ -22,13 +34,9 @@
}
},
"documentationURL": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,76 +2,43 @@
"germplasm": {
"dynamic": "strict",
"properties": {
"germplasmDbId": {
"germplasmURI": {
"type": "keyword"
},
"defaultDisplayName": {
"germplasmDbId": {
"type": "keyword"
},
"accessionNumber": {
"type": "keyword",
"doc_values": true
},
"germplasmName": {
"type": "keyword",
"doc_values": true
},
"germplasmPUI": {
"type": "keyword"
},
"pedigree": {
"type": "keyword"
},
"seedSource": {
"type": "keyword"
},
"synonyms": {
"germplasmName": {
"type": "keyword",
"doc_values": true
},
"commonCropName": {
"accessionNumber": {
"type": "keyword",
"doc_values": true
},
"instituteCode": {
"type": "keyword"
},
"instituteName": {
"acquisitionDate": {
"type": "keyword"
},
"biologicalStatusOfAccessionCode": {
"type": "keyword"
},
"countryOfOriginCode": {
"breedingMethodDbId": {
"type": "keyword"
},
"typeOfGermplasmStorageCode": {
"type": "keyword"
},
"taxonIds": {
"properties": {
"sourceName": {
"type": "keyword"
},
"taxonId": {
"type": "keyword"
}
}
},
"genus": {
"commonCropName": {
"type": "keyword",
"doc_values": true
},
"species": {
"type": "keyword"
},
"speciesAuthority": {
"countryOfOriginCode": {
"type": "keyword"
},
"subtaxa": {
"defaultDisplayName": {
"type": "keyword"
},
"subtaxaAuthority": {
"documentationURL": {
"type": "keyword"
},
"donors": {
......@@ -82,6 +49,9 @@
"donorGermplasmPUI": {
"type": "keyword"
},
"germplasmPUI": {
"type": "keyword"
},
"donorAccessionNumber": {
"type": "keyword"
},
......@@ -118,17 +88,70 @@
}
}
},
"acquisitionDate": {
"genus": {
"type": "keyword",
"doc_values": true
},
"germplasmGenus": {
"type": "keyword",
"doc_values": true
},
"species": {
"type": "keyword"
},
"germplasmSpecies": {
"type": "keyword",
"doc_values": true
},
"genusSpecies": {
"type": "keyword",
"doc_values": true
},
"speciesAuthority": {
"type": "keyword"
},
"subtaxa": {
"type": "keyword"
},
"subTaxa": {
"type": "keyword"
},
"genusSpeciesSubtaxa": {
"type": "keyword",
"doc_values": true
},
"subtaxaAuthority": {
"type": "keyword"
},
"instituteCode": {
"type": "keyword"
},
"instituteName": {
"type": "keyword"
},
"pedigree": {
"type": "keyword"
},
"seedSource": {
"type": "keyword"
},
"SeedSource": {
"type": "keyword"
},
"synonyms": {
"type": "keyword",
"doc_values": true
},
"taxonIds": {
"properties": {
"sourceName": {
"type": "keyword"
},
"taxonId": {
"type": "keyword"
}
}
},
"taxonSynonyms": {
"type": "keyword",
"doc_values": true
......@@ -140,6 +163,9 @@
"taxonComment": {
"type": "keyword"
},
"typeOfGermplasmStorageCode": {
"type": "keyword"
},
"geneticNature": {
"type": "keyword"
},
......@@ -531,17 +557,22 @@
}
}
},
"studyURIs": {
"type": "keyword"
},
"studyDbIds": {
"type": "keyword"
},
"documentationURL": {
"studyURI": {
"type": "keyword"
},
"source": {
"studyDbId": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,6 +2,9 @@
"location": {
"dynamic": "strict",
"properties": {
"locationURI": {
"type": "keyword"
},
"locationDbId": {
"type": "keyword"
},
......@@ -11,10 +14,13 @@
"name": {
"type": "keyword"
},
"locationType": {
"abbreviation": {
"type": "keyword"
},
"abbreviation": {
"abreviation": {
"type": "keyword"
},
"locationType": {
"type": "keyword"
},
"countryCode": {
......@@ -23,19 +29,25 @@
"countryName": {
"type": "keyword"
},
"institutionName": {
"documentationURL": {
"type": "keyword"
},
"instituteName": {
"type": "keyword"
},
"institutionAdress": {
"instituteAddress": {
"type": "keyword"
},
"latitude": {
"instituteAdress": {
"type": "keyword"
},
"altitude": {
"type": "double"
},
"longitude": {
"latitude": {
"type": "double"
},
"altitude": {
"longitude": {
"type": "double"
},
"additionalInfo": {
......@@ -43,13 +55,16 @@
"properties": {}
},
"documentationURL": {
"studyDbIds": {
"type": "keyword"
},
"source": {
"studyURIs": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,111 +2,85 @@
"observationUnit": {
"dynamic": "strict",
"properties": {
"observationUnitURI": {
"type": "keyword"
},
"observationUnitDbId": {
"type": "keyword"
},
"observationUnitName": {
"type": "keyword"
},
"observationLevel": {
"X": {
"type": "keyword"
},
"observationLevels": {
"Y": {
"type": "keyword"
},
"observationLevelDetails": {
"properties": {
"type": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
"Xname": {
"type": "keyword"
},
"plotNumber": {
"type": "keyword",
"index": false
"Yname": {
"type": "keyword"
},
"plantNumber": {
"type": "keyword",
"index": false
"Xm": {
"type": "keyword"
},
"Ym": {
"type": "keyword"
},
"blockNumber": {
"type": "keyword",
"index": false
},
"replicate": {
"entryNumber": {
"type": "keyword",
"index": false
},
"X": {
"type": "keyword"
"entryType": {
"type": "keyword",
"index": false
},
"Y": {
"germplasmURI": {
"type": "keyword"
},
"Xname": {
"germplasmDbId": {
"type": "keyword"
},
"Yname": {
"germplasmPUI": {
"type": "keyword"
},
"Xm": {
"germplasmName": {
"type": "keyword"
},
"Ym": {
"accessionNumber": {
"type": "keyword"
},
"germplasmDbId": {
"observationLevel": {
"type": "keyword"
},
"germplasmId": {
"type": "long"
},
"germplasmPUI": {
"type": "long"
},
"germplasmName": {
"observationLevels": {
"type": "keyword"
},
"observationUnitXref": {
"observationLevelDetails": {
"properties": {
"id": {
"type": {
"type": "keyword"
},
"source": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"studyDbId": {
"type": "keyword"
},
"studyName": {
"type": "keyword"
},
"studyLocationDbId": {
"type": "keyword"
},
"studyLocation": {
"type": "keyword"
},
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"treatments": {
"observationUnitXref": {
"properties": {
"factor": {
"id": {
"type": "keyword"
},
"modality": {
"source": {
"type": "keyword"
}
}
......@@ -114,54 +88,95 @@
"observations": {
"type": "nested",
"properties": {
"observationDbId": {
"observationURI": {
"type": "keyword"
},
"observationVariableDbId": {
"observationDbId": {
"type": "keyword"
},
"observationVariableName": {
"type": "keyword"
"collector": {
"type": "keyword",
"index": false
},
"observationTimeStamp": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss'Z'"
},
"season": {
"gdd": {
"type": "float"
},
"observationVariableURI": {
"type": "keyword"
},
"collector": {
"type": "keyword",
"index": false
"observationVariableDbId": {
"type": "keyword"
},
"value": {
"observationVariableName": {
"type": "keyword"
},
"specificName": {
"type": "keyword"
},
"observationVariableId": {
"type": "long"
"season": {
"type": "keyword"
},
"value": {
"type": "keyword"
},
"isDataFile": {
"type": "boolean",
"index": false
},
"gdd": {
"type": "float"
}
}
},
"entryType": {
"plantNumber": {
"type": "keyword",
"index": false
},
"entryNumber": {
"plotNumber": {
"type": "keyword",
"index": false
},
"studyId": {
"type": "long"
"programURI": {
"type": "keyword"
},
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"replicate": {
"type": "keyword",
"index": false
},
"studyURI": {
"type": "keyword"
},
"studyDbId": {
"type": "keyword"
},
"studyName": {
"type": "keyword"
},
"studyLocationURI": {
"type": "keyword"
},
"studyLocationDbId": {
"type": "keyword"
},
"studyLocation": {
"type": "keyword"
},
"treatments": {
"properties": {
"factor": {
"type": "keyword"
},
"modality": {
"type": "keyword"
}
}
},
"trials": {
"properties": {
......@@ -173,12 +188,6 @@
}
}
},
"accessionNumber": {
"type": "keyword"
},
"taxonId": {
"type": "long"
},
"taxonScientificName": {
"type": "keyword"
},
......@@ -206,13 +215,9 @@
}
},
"documentationURL": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,48 +2,56 @@
"program": {
"dynamic": "strict",
"properties": {
"programDbId": {
"programURI": {
"type": "keyword"
},
"name": {
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"abbreviation": {
"type": "keyword"
},
"objective": {
"commonCropName": {
"type": "keyword"
},
"documentationURL": {
"type": "keyword"
},
"leadPerson": {
"type": "keyword"
},
"studyDbIds": {
"leadPersonDbId": {
"type": "keyword"
},
"trialDbIds": {
"leadPersonName": {
"type": "keyword"
},
"programURI": {
"objective": {
"type": "keyword"
},
"studyURIs": {
"trialDbIds": {
"type": "keyword"
},
"trialURIs": {
"type": "keyword"
},
"documentationURL": {
"studyDbIds": {
"type": "keyword"
},
"source": {
"studyURIs": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,6 +2,9 @@
"study": {
"dynamic": "strict",
"properties": {
"studyURI": {
"type": "keyword"
},
"studyDbId": {
"type": "keyword"
},
......@@ -11,6 +14,65 @@
"name": {
"type": "keyword"
},
"active": {
"type": "boolean"
},
"commonCropName": {
"type": "keyword"
},
"organism": {
"type": "keyword"
},
"contactURIs": {
"type": "keyword"
},
"contactDbIds": {
"type": "keyword"
},
"contacts": {
"properties": {
"contactURI": {
"type": "keyword"
},
"contactDbId": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"instituteName": {
"type": "keyword"
},
"email": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"orcid": {
"type": "keyword"
}
}
},
"dataLinks": {
"properties": {
"dataLinkName": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"url": {
"type": "keyword"
}
}
},
"documentationURL": {
"type": "keyword"
},
"startDate": {
"type": "date",
"format": "YYYY-MM-dd"
......@@ -19,13 +81,9 @@
"type": "date",
"format": "YYYY-MM-dd"
},
"active": {
"type": "boolean"
},
"studyType": {
"year": {
"type": "keyword"
},
"lastUpdate": {
"properties": {
"version": {
......@@ -37,23 +95,16 @@
}
}
},
"trialDbId": {
"type": "keyword"
},
"trialName": {
"type": "keyword"
},
"trialDbIds": {
"license": {
"type": "keyword"
},
"programDbId": {
"locationURIs": {
"type": "keyword"
},
"programName": {
"locationDbIds": {
"type": "keyword"
},
"seasons": {
"locationURI": {
"type": "keyword"
},
"locationDbId": {
......@@ -63,67 +114,131 @@
"type": "keyword"
},
"location": {
"dynamic": "true",
"properties": {
"locationURI": {
"type": "keyword"
},
"locationDbId": {
"type": "keyword"
}
}
},
"dataLinks": {
"properties": {
},
"locationName": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"type": {
"abbreviation": {
"type": "keyword"
},
"url": {
"abreviation": {
"type": "keyword"
}
}
},
"contacts": {
"properties": {
"contactDbId": {
},
"locationType": {
"type": "keyword"
},
"name": {
"countryCode": {
"type": "keyword"
},
"institutionName": {
"countryName": {
"type": "keyword"
},
"email": {
"documentationURL": {
"type": "keyword"
},
"type": {
"instituteName": {
"type": "keyword"
},
"orcid": {
"instituteAddress": {
"type": "keyword"
},
"instituteAdress": {
"type": "keyword"
},
"altitude": {
"type": "double"
},
"latitude": {
"type": "double"
},
"longitude": {
"type": "double"
},
"additionalInfo": {
"dynamic": "true",
"type": "object",
"properties": {}
}
}
},
"observationVariableDbIds": {
"programURIs": {
"type": "keyword"
},
"programDbIds": {
"type": "keyword"
},
"programURI": {
"type": "keyword"
},
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"seasons": {
"type": "keyword"
},
"studyDescription": {
"type": "keyword"
},
"studyTypeURI": {
"type": "keyword"
},
"studyTypeDbId": {
"type": "keyword"
},
"studyType": {
"type": "keyword"
},
"studyTypeName": {
"type": "keyword"
},
"trialURIs": {
"type": "keyword"
},
"trialDbIds": {
"type": "keyword"
},
"trialURI": {
"type": "keyword"
},
"trialDbId": {
"type": "keyword"
},
"trialName": {
"type": "keyword"
},
"germplasmURIs": {
"type": "keyword"
},
"germplasmDbIds": {
"type": "keyword"
},
"observationVariableURIs": {
"type": "keyword"
},
"observationVariableDbIds": {
"type": "keyword"
},
"additionalInfo": {
"dynamic": "true",
"type": "object",
"properties": {}
},
"documentationURL": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -2,21 +2,30 @@
"trial": {
"dynamic": "strict",
"properties": {
"docId": {
"trialURI": {
"type": "keyword"
},
"trialDbId": {
"type": "keyword"
},
"trialPUI": {
"trialName": {
"type": "keyword"
},
"trialName": {
"trialPUI": {
"type": "keyword"
},
"trialType": {
"type": "keyword"
},
"active": {
"type": "boolean"
},
"commonCropName": {
"type": "keyword"
},
"documentationURL": {
"type": "keyword"
},
"startDate": {
"type": "date",
"format": "YYYY-MM-dd"
......@@ -25,9 +34,179 @@
"type": "date",
"format": "YYYY-MM-dd"
},
"active": {
"type": "boolean"
"programURIs": {
"type": "keyword"
},
"programDbIds": {
"type": "keyword"
},
"programURI": {
"type": "keyword"
},
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"studyURIs": {
"type": "keyword"
},
"studyDbIds": {
"type": "keyword"
},
"studies": {
"properties": {
"studyURI": {
"type": "keyword"
},
"studyDbId": {
"type": "keyword"
},
"studyName": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"locationURI": {
"type": "keyword"
},
"locationDbId": {
"type": "keyword"
},
"locationName": {
"type": "keyword"
},
"active": {
"type": "boolean"
},
"commonCropName": {
"type": "keyword"
},
"contactURIs": {
"type": "keyword"
},
"contactDbIds": {
"type": "keyword"
},
"dataLinks": {
"properties": {
"name": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"url": {
"type": "keyword"
}
}
},
"documentationURL": {
"type": "keyword"
},
"startDate": {
"type": "date",
"format": "YYYY-MM-dd"
},
"endDate": {
"type": "date",
"format": "YYYY-MM-dd"
},
"license": {
"type": "keyword"
},
"programURIs": {
"type": "keyword"
},
"programDbIds": {
"type": "keyword"
},
"programURI": {
"type": "keyword"
},
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"seasons": {
"dynamic": "true",
"properties": {}
},
"studyType": {
"type": "keyword"
},
"studyTypeName": {
"type": "keyword"
},
"trialURIs": {
"type": "keyword"
},
"trialDbIds": {
"type": "keyword"
},
"trialURI": {
"type": "keyword"
},
"trialDbId": {
"type": "keyword"
},
"trialName": {
"type": "keyword"
},
"germplasmURIs": {
"type": "keyword"
},
"germplasmDbIds": {
"type": "keyword"
},
"observationVariableURIs": {
"type": "keyword"
},
"observationVariableDbIds": {
"type": "keyword"
},
"locationURIs": {
"type": "keyword"
},
"locationDbIds": {
"type": "keyword"
},
"additionalInfo": {
"dynamic": "true",
"type": "object",
"properties": {}
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
"@type": {
"type": "keyword"
},
"@id": {
"type": "keyword"
},
"schema:includedInDataCatalog": {
"type": "keyword"
},
"schema:identifier": {
"type": "keyword"
},
"schema:name": {
"type": "keyword"
},
"schema:url": {
"type": "keyword"
}
}
},
"datasetAuthorship": {
"properties": {
"license": {
......@@ -48,15 +227,24 @@
}
}
},
"contactURIs": {
"type": "keyword"
},
"contactDbIds": {
"type": "keyword"
},
"contacts": {
"properties": {
"contactURI": {
"type": "keyword"
},
"contactDbId": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"institutionName": {
"instituteName": {
"type": "keyword"
},
"email": {
......@@ -65,71 +253,57 @@
"type": {
"type": "keyword"
},
"contactURI": {
"orcid": {
"type": "keyword"
},
"orcid": {
"studyURIs": {
"type": "keyword"
}
}
},
"studies": {
"properties": {
"studyDbId": {
},
"studyDbIds": {
"type": "keyword"
},
"studyName": {
"trialURIs": {
"type": "keyword"
},
"name": {
"trialDbIds": {
"type": "keyword"
},
"locationDbId": {
"source": {
"type": "keyword"
},
"locationName": {
"groupId": {
"type": "long"
},
"@type": {
"type": "keyword"
},
"studyURI": {
"@id": {
"type": "keyword"
},
"locationURI": {
"schema:includedInDataCatalog": {
"type": "keyword"
},
"schema:identifier": {
"type": "keyword"
},
"schema:name": {
"type": "keyword"
},
"schema:url": {
"type": "keyword"
}
}
},
"commonCropName": {
"type": "keyword"
},
"additionalInfo": {
"dynamic": "true",
"type": "object",
"properties": {}
},
"programDbId": {
"type": "keyword"
},
"programName": {
"type": "keyword"
},
"studyDbIds": {
"type": "keyword"
},
"contactDbIds": {
"type": "keyword"
},
"trialURI": {
"type": "keyword"
},
"documentationURL": {
"type": "keyword"
},
"source": {
"type": "keyword"
},
"groupId": {
"type": "long"
},
......
......@@ -7,9 +7,10 @@ ES_PORT="9200"
ENV="dev"
DOCUMENT_TYPES="all"
ALL_DOCUMENT_TYPES="germplasm germplasm-mcpd germplasmAttribute germplasmPedigree germplasmProgeny location program study trial observationUnit datadiscovery"
ALL_DOCUMENT_TYPES="germplasm germplasmMcpd germplasmAttribute germplasmPedigree germplasmProgeny location program study trial observationUnit datadiscovery"
ALL_ENVS="dev beta staging int prod test"
BASEDIR=$(dirname "$0")
TMP_FILE="log.tmp"
RED='\033[0;31m'
GREEN='\033[0;32m'
......@@ -118,7 +119,7 @@ done
for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
echo && echo -e "${BOLD}Manage ${DOCUMENT_TYPE} documents...${NC}"
INDEX_PATTERN="faidare_${DOCUMENT_TYPE}_${ENV}"
INDEX_PATTERN=$(echo "faidare_${DOCUMENT_TYPE}_${ENV}" | sed -E "s/([a-z])([A-Z])/\1-\2/" | tr '[:upper:]' '[:lower:]')
# Create template
TEMPLATE_NAME="${INDEX_PATTERN}_template"
......@@ -139,21 +140,26 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
{
parallel --bar "
curl -s -H 'Content-Type: application/x-ndjson' -H 'Content-Encoding: gzip' -H 'Accept-Encoding: gzip' -XPOST ${ES_HOST}:${ES_PORT}/${INDEX_NAME}/_bulk --data-binary '@{}' > {.}.log.gz" \
::: $(find ${DATA_DIR} -name ${DOCUMENT_TYPE}*.json.gz)
::: $(find ${DATA_DIR} -name ${DOCUMENT_TYPE}-*.json.gz)
} || {
code=$?
echo -e "${RED}ERROR: a problem occurred when trying to index data with parallel program.${NC}"
exit $code
}
#echo -e "${RED}ERROR: a problem occurred when trying to index data into ${ES_HOST}:${ES_PORT}/${INDEX_NAME} indice.${NC}"
parallel "gunzip -c {} | jq '.errors' | grep -q true && echo -e '${ORANGE}ERROR found in {}${NC}' ;" ::: $(find ${DATA_DIR} -name ${DOCUMENT_TYPE}*.log.gz)
parallel "gunzip -c {} | jq '.errors' | grep -q true && echo -e '${ORANGE}ERROR found in {}${NC}' >> ${TMP_FILE} ;" ::: $(find ${DATA_DIR} -name ${DOCUMENT_TYPE}-*.log.gz)
if [ -f "${TMP_FILE}" ] && [ -s "${TMP_FILE}" ]; then
echo -e "${RED}ERROR: a problem occurred when trying to index data into ${ES_HOST}:${ES_PORT}/${INDEX_NAME} indice.${NC}"
echo -e "${ORANGE}$(cat ${TMP_FILE})${NC}"
rm "${TMP_FILE}"
exit 1;
fi
# Check indexed data
echo -e "* Check data indexed from ${DATA_DIR} into ${INDEX_NAME}..."
# skip some documents because they contain nested objects that distort the count
if [[ "${DOCUMENT_TYPE}" != "germplasmAttribute" && "${DOCUMENT_TYPE}" != "observationUnit" && "${DOCUMENT_TYPE}" != "datadiscovery" ]]; then
COUNT_EXTRACTED_DOCS=0
for FILE in $(find ${DATA_DIR} -name ${DOCUMENT_TYPE}*.json.gz); do
for FILE in $(find ${DATA_DIR} -name ${DOCUMENT_TYPE}-*.json.gz); do
COUNT_FILE_DOCS=$(zcat ${FILE} | grep "\"_id\"" | sort | uniq | wc -l)
COUNT_EXTRACTED_DOCS=$((COUNT_EXTRACTED_DOCS+COUNT_FILE_DOCS))
done
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment