Commit b8122968 authored by Jean-Baptiste Nizet's avatar Jean-Baptiste Nizet Committed by Exbrayat Cédric
Browse files

feat: aggregate and filter on null/empty values

fix #3
parent 7e258a76
......@@ -19,6 +19,8 @@ import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.TermsQueryBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
......@@ -101,7 +103,7 @@ public class GeneticResourceDaoImpl implements GeneticResourceDaoCustom {
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-post-filter.html
BoolQueryBuilder refinementQuery = boolQuery();
for (RareAggregation term : refinements.getTerms()) {
refinementQuery.must(termsQuery(term.getField(), refinements.getRefinementsForTerm(term)));
refinementQuery.must(createRefinementQuery(refinements, term));
}
// this allows avoiding to get back the suggestions field in the found documents, since we don't care
......@@ -118,6 +120,7 @@ public class GeneticResourceDaoImpl implements GeneticResourceDaoCustom {
Stream.of(RareAggregation.values()).forEach(rareAggregation ->
builder.addAggregation(AggregationBuilders.terms(rareAggregation.getName())
.field(rareAggregation.getField())
.missing(GeneticResource.NULL_VALUE)
.size(rareAggregation.getType().getMaxBuckets())));
}
......@@ -128,6 +131,46 @@ public class GeneticResourceDaoImpl implements GeneticResourceDaoCustom {
return elasticsearchTemplate.queryForPage(builder.build(), GeneticResource.class, geneticResourceHighlightMapper);
}
/**
* Creates a refinement query for the given term of the given refinements.
* Here are the various cases:
* <ul>
* <li>
* The field can't possibly be null (like the name, for example). In that case, all the acceptable values
* for the term are actual names, and all we need is a terms query containing all the accepted values
* </li>
* <li>
* The field is of type string and can be null. In that case, the null value is indexed, thanks to the
* mapping, as {@link GeneticResource#NULL_VALUE}. This NULL value can be considered as any other real value
* and all we need is thus a terms query containing all the accepted values, including NULL
* </li>
* <li>
* The field is an array, and can be an empty array. In that case, it's considered by ElasticSearch as
* missing, but the aggregation created in {@link #search(String, boolean, boolean, SearchRefinements, Pageable)}
* puts missing values in the bucket with the key {@link GeneticResource#NULL_VALUE}. So, the aggregation
* considers null values and missing values as the same value: {@link GeneticResource#NULL_VALUE}.
* It's still considered, when searching, as a missing value though. So, if {@link GeneticResource#NULL_VALUE}
* is present in the set of accepted values, we want both the null values and the missing values. We thus
* create an <code>or</code> query (i.e. a <code>should</code> bool query in Elasticsearch terms), which
* combines a terms query containing all the accepted values, including NULL, and a "not exists" query.
* This combined query thus returns all the documents that have one of the real accepted values, or is null
* and thus has the value {@link GeneticResource#NULL_VALUE} in the index, or does not exist.
* </li>
* </ul>
*/
private QueryBuilder createRefinementQuery(SearchRefinements refinements, RareAggregation term) {
Set<String> acceptedValues = refinements.getRefinementsForTerm(term);
TermsQueryBuilder termsQuery = termsQuery(term.getField(), acceptedValues);
if (acceptedValues.contains(GeneticResource.NULL_VALUE)) {
return boolQuery()
.should(termsQuery)
.should(boolQuery().mustNot(existsQuery(term.getField())));
}
else {
return termsQuery;
}
}
@Override
public List<String> suggest(String term) {
SuggestBuilder suggestion =
......
......@@ -22,6 +22,14 @@ import org.springframework.data.elasticsearch.annotations.Mapping;
)
@Mapping(mappingPath = "fr/inra/urgi/rare/domain/GeneticResource.mapping.json")
public class GeneticResource {
/**
* The value used to index null values, in order to be able to create a refinement query for this value.
* Note that this value is only used when a field has the value `null`. If a field is an empty array, it's
* considered by ElasticSearch as missing.
*/
public static final String NULL_VALUE = "NULL";
@Id
@JsonProperty("identifier")
private final String id;
......
......@@ -106,7 +106,8 @@
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
"ignore_above": 256,
"null_value": "NULL"
}
}
},
......@@ -115,7 +116,8 @@
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
"ignore_above": 256,
"null_value": "NULL"
}
}
},
......@@ -124,7 +126,8 @@
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
"ignore_above": 256,
"null_value": "NULL"
}
}
},
......@@ -139,7 +142,8 @@
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
"ignore_above": 256,
"null_value": "NULL"
}
}
},
......
......@@ -383,6 +383,71 @@ class GeneticResourceDaoTest {
assertThat(taxon.getBuckets()).extracting(Bucket::getDocCount).containsOnly(1L);
}
@Test
void shouldAggregateNullCountryOfOriginValueAsNullValue() {
GeneticResource geneticResource1 = GeneticResource.builder()
.withId("r1")
.withName("vitis 1")
.withCountryOfOrigin("France")
.build();
GeneticResource geneticResource2 = GeneticResource.builder()
.withId("r2")
.withName("vitis 2")
.withCountryOfOrigin(null)
.build();
geneticResourceDao.saveAll(Arrays.asList(new IndexedGeneticResource(geneticResource1),
new IndexedGeneticResource(geneticResource2)));
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("vitis", true, false, SearchRefinements.EMPTY, firstPage);
assertThat(result.getContent()).hasSize(2);
Terms countryOfOrigin = result.getAggregations().get(RareAggregation.COUNTRY_OF_ORIGIN.getName());
assertThat(countryOfOrigin.getName()).isEqualTo(RareAggregation.COUNTRY_OF_ORIGIN.getName());
assertThat(countryOfOrigin.getBuckets()).extracting(Bucket::getKeyAsString).containsOnly("France", GeneticResource.NULL_VALUE);
assertThat(countryOfOrigin.getBuckets()).extracting(Bucket::getDocCount).containsOnly(1L);
}
@Test
void shouldAggregateEmptyMaterialTypeAsNullValue() {
shouldAggregateEmptyArrayAsNullValue(RareAggregation.MATERIAL,
(builder, value) -> builder.withMaterialType(value));
}
@Test
void shouldAggregateEmptyBiotopeTypeAsNullValue() {
shouldAggregateEmptyArrayAsNullValue(RareAggregation.BIOTOPE,
(builder, value) -> builder.withBiotopeType(value));
}
private void shouldAggregateEmptyArrayAsNullValue(RareAggregation rareAggregation,
BiConsumer<GeneticResource.Builder, List<String>> initializer) {
GeneticResource.Builder resource1Builder = GeneticResource.builder()
.withId("r1")
.withName("vitis 1");
initializer.accept(resource1Builder, Collections.singletonList("foo"));
GeneticResource geneticResource1 = resource1Builder.build();
GeneticResource geneticResource2 = GeneticResource.builder()
.withId("r2")
.withName("vitis 2")
.build();
geneticResourceDao.saveAll(Arrays.asList(new IndexedGeneticResource(geneticResource1),
new IndexedGeneticResource(geneticResource2)));
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("vitis", true, false, SearchRefinements.EMPTY, firstPage);
assertThat(result.getContent()).hasSize(2);
Terms material = result.getAggregations().get(rareAggregation.getName());
assertThat(material.getName()).isEqualTo(rareAggregation.getName());
assertThat(material.getBuckets()).extracting(Bucket::getKeyAsString).containsOnly("foo", GeneticResource.NULL_VALUE);
assertThat(material.getBuckets()).extracting(Bucket::getDocCount).containsOnly(1L);
}
@Test
void shouldFindPillars() {
GeneticResource resource1 = GeneticResource.builder()
......@@ -514,12 +579,23 @@ class GeneticResourceDaoTest {
.withDomain("Fungi")
.withBiotopeType(Arrays.asList("Biotope"))
.withMaterialType(Arrays.asList("DNA"))
.withCountryOfOrigin(null)
.withDescription("hello world")
.build();
GeneticResource geneticResource3 = GeneticResource.builder()
.withId("r3")
.withName("ding")
.withDomain("Plantae")
.withBiotopeType(Collections.emptyList())
.withMaterialType(Collections.emptyList())
.withCountryOfOrigin("France")
.withDescription("hello world")
.build();
geneticResourceDao.saveAll(Arrays.asList(new IndexedGeneticResource(geneticResource1),
new IndexedGeneticResource(geneticResource2)));
new IndexedGeneticResource(geneticResource2),
new IndexedGeneticResource(geneticResource3)));
}
@Test
......@@ -532,7 +608,7 @@ class GeneticResourceDaoTest {
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("hello", false, false, refinements, firstPage);
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r1");
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r1", "r3");
refinements = SearchRefinements.builder()
.withTerm(RareAggregation.DOMAIN, Arrays.asList("unexisting", "Fungi"))
......@@ -549,6 +625,45 @@ class GeneticResourceDaoTest {
assertThat(result.getContent()).extracting(GeneticResource::getId).isEmpty();
}
@Test
void shouldApplyRefinementOnNullValue() {
SearchRefinements refinements =
SearchRefinements.builder()
.withTerm(RareAggregation.COUNTRY_OF_ORIGIN, Arrays.asList(GeneticResource.NULL_VALUE))
.build();
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("hello", false, false, refinements, firstPage);
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r2");
}
@Test
void shouldApplyRefinementOnEmptyBiotopeType() {
SearchRefinements refinements =
SearchRefinements.builder()
.withTerm(RareAggregation.BIOTOPE, Arrays.asList(GeneticResource.NULL_VALUE))
.build();
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("hello", false, false, refinements, firstPage);
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r3");
}
@Test
void shouldApplyRefinementOnEmptyMaterialType() {
SearchRefinements refinements =
SearchRefinements.builder()
.withTerm(RareAggregation.MATERIAL, Arrays.asList(GeneticResource.NULL_VALUE))
.build();
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("hello", false, false, refinements, firstPage);
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r3");
}
@Test
void shouldApplyRefinementsOnMultipleTermsWithAnd() {
SearchRefinements refinements =
......@@ -583,14 +698,14 @@ class GeneticResourceDaoTest {
AggregatedPage<GeneticResource> result =
geneticResourceDao.search("hello", true, false, refinements, firstPage);
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r1");
assertThat(result.getContent()).extracting(GeneticResource::getId).containsOnly("r1", "r3");
// aggregations are computed based on the result of the full-text-query, not based on the result
// of the refinements
Terms domain = result.getAggregations().get(RareAggregation.DOMAIN.getName());
assertThat(domain.getBuckets()).hasSize(2);
assertThat(domain.getBuckets()).extracting(Bucket::getKeyAsString).containsOnly("Plantae", "Fungi");
assertThat(domain.getBuckets()).extracting(Bucket::getDocCount).containsOnly(1L);
assertThat(domain.getBuckets()).extracting(Bucket::getDocCount).containsOnly(2L, 1L);
result = geneticResourceDao.search("Human", true, false, refinements, firstPage);
......
......@@ -5,7 +5,7 @@ import { NgbTooltipModule, NgbTypeaheadModule } from '@ng-bootstrap/ng-bootstrap
import { ComponentTester } from 'ngx-speculoos';
import { AggregationsComponent } from './aggregations.component';
import { SmallAggregationComponent } from '../aggregation/small-aggregation.component';
import { SmallAggregationComponent } from '../small-aggregation/small-aggregation.component';
import { LargeAggregationComponent } from '../large-aggregation/large-aggregation.component';
import { toAggregation } from '../models/test-model-generators';
import { AggregationCriterion } from '../models/aggregation-criterion';
......
......@@ -15,7 +15,7 @@ import { SearchComponent } from './search/search.component';
import { GeneticResourcesComponent } from './genetic-resources/genetic-resources.component';
import { GeneticResourceComponent } from './genetic-resource/genetic-resource.component';
import { AggregationsComponent } from './aggregations/aggregations.component';
import { SmallAggregationComponent } from './aggregation/small-aggregation.component';
import { SmallAggregationComponent } from './small-aggregation/small-aggregation.component';
import { LargeAggregationComponent } from './large-aggregation/large-aggregation.component';
import { PillarsComponent } from './pillars/pillars.component';
import { AggregationNamePipe } from './aggregation-name.pipe';
......
<ng-template #resultTemplate let-bucket="result" let-t="term">
<ng-container *ngIf="bucket !== 'REFINE'; else refine">
<ngb-highlight [result]="bucket.key" [term]="t"></ngb-highlight>
<ngb-highlight [result]="displayableKey(bucket.key)" [term]="t"></ngb-highlight>
<small class="ml-1 text-muted">[{{ bucket.documentCount | number }}]</small>
</ng-container>
<ng-template #refine>
......@@ -27,7 +27,7 @@
<div class="mb-2">
<span class="badge badge-pill badge-secondary mr-1" *ngFor="let key of selectedKeys" tabindex="0"
(keydown.backspace)="removeKey(key)">
<rare-document-count [name]="key" [count]="documentCountForKey(key)" [muted]="false"></rare-document-count>
<rare-document-count [name]="displayableKey(key)" [count]="documentCountForKey(key)" [muted]="false"></rare-document-count>
<button tabindex="-1" type="button" class="btn btn-link" (click)="removeKey(key)">&times;</button>
</span>
</div>
......
......@@ -11,10 +11,11 @@ import { AggregationCriterion } from '../models/aggregation-criterion';
import { AggregationNamePipe } from '../aggregation-name.pipe';
import { DocumentCountComponent } from '../document-count/document-count.component';
import { Bucket } from '../models/page';
import { NULL_VALUE } from '../models/genetic-resource.model';
describe('LargeAggregationComponent', () => {
const aggregation = toAggregation('coo', ['France', 'Italy', 'New Zealand']);
const aggregation = toAggregation('coo', ['France', 'Italy', 'New Zealand', NULL_VALUE]);
class LargeAggregationComponentTester extends ComponentTester<LargeAggregationComponent> {
constructor() {
......@@ -65,7 +66,7 @@ describe('LargeAggregationComponent', () => {
tester.detectChanges();
// then it should display a title and the number of possible keys
expect(tester.title).toHaveText('Pays d\'origine (3)');
expect(tester.title).toHaveText('Pays d\'origine (4)');
// and the buckets with their name and count in a typeahead
expect(tester.inputField).not.toBeNull();
expect(tester.typeahead).not.toBeNull();
......@@ -86,7 +87,7 @@ describe('LargeAggregationComponent', () => {
it('should display the selected criteria as pills', () => {
// given an aggregation with a bucket and a selected value
const selectedKeys = ['France', 'Italy'];
const selectedKeys = ['France', 'Italy', NULL_VALUE];
const tester = new LargeAggregationComponentTester();
const component = tester.componentInstance;
......@@ -97,11 +98,13 @@ describe('LargeAggregationComponent', () => {
tester.detectChanges();
// then it should have several removable pills
expect(tester.pills.length).toBe(2);
expect(tester.pills.length).toBe(3);
expect(tester.pills[0]).toContainText('France[10]');
expect(tester.pills[0].button('button')).not.toBeNull();
expect(tester.pills[1]).toContainText('Italy[20]');
expect(tester.pills[1].button('button')).not.toBeNull();
expect(tester.pills[2]).toContainText('Aucun[40]');
expect(tester.pills[2].button('button')).not.toBeNull();
});
it('should find one results containing the term entered', () => {
......@@ -119,6 +122,21 @@ describe('LargeAggregationComponent', () => {
expect((actualResults[0] as Bucket).key).toBe('France');
});
it('should find one results containing the term entered when it is the null value translation', () => {
// given an aggregation with a bucket
const component = new LargeAggregationComponent();
component.aggregation = aggregation;
// when searching for a result
let actualResults: Array<BucketOrRefine> = [];
component.search(of('auc'))
.subscribe(results => actualResults = results);
// then it should have no match
expect(actualResults.length).toBe(1);
expect((actualResults[0] as Bucket).key).toBe(NULL_VALUE);
});
it('should find the results containing the term entered and ignore the case', () => {
// given an aggregation with a bucket
const component = new LargeAggregationComponent();
......@@ -130,10 +148,11 @@ describe('LargeAggregationComponent', () => {
.subscribe(results => actualResults = results);
// then it should have one match
expect(actualResults.length).toBe(3);
expect(actualResults.length).toBe(4);
expect((actualResults[0] as Bucket).key).toBe('France');
expect((actualResults[1] as Bucket).key).toBe('Italy');
expect((actualResults[2] as Bucket).key).toBe('New Zealand');
expect((actualResults[3] as Bucket).key).toBe(NULL_VALUE);
});
it('should not find the results containing the term entered if it is already selected', () => {
......
......@@ -3,6 +3,7 @@ import { FormControl } from '@angular/forms';
import { Observable } from 'rxjs';
import { debounceTime, distinctUntilChanged, map } from 'rxjs/operators';
import { NgbTypeaheadSelectItemEvent } from '@ng-bootstrap/ng-bootstrap';
import { NULL_VALUE } from '../models/genetic-resource.model';
import { Aggregation, Bucket } from '../models/page';
import { AggregationCriterion } from '../models/aggregation-criterion';
......@@ -37,7 +38,7 @@ export class LargeAggregationComponent {
// returns values not already selected
.filter(bucket => !this.selectedKeys.includes(bucket.key)
// and that contains the term, ignoring the case
&& bucket.key.toLowerCase().includes(term.toLowerCase()));
&& this.displayableKey(bucket.key).toLowerCase().includes(term.toLowerCase()));
// return the first N results
const result: Array<BucketOrRefine> = allMatchingBuckets.slice(0, maxResultsDisplayed);
......@@ -83,4 +84,8 @@ export class LargeAggregationComponent {
documentCountForKey(key: string) {
return this.aggregation.buckets.find(bucket => bucket.key === key).documentCount;
}
displayableKey(key: string): string {
return key === NULL_VALUE ? 'Aucun' : key;
}
}
export const NULL_VALUE = 'NULL';
export interface GeneticResourceModel {
identifier: string;
name: string;
......
......@@ -16,7 +16,7 @@ import { SearchService } from '../search.service';
import { GeneticResourceModel } from '../models/genetic-resource.model';
import { toAggregation, toGeneticResource, toSecondPage, toSinglePage } from '../models/test-model-generators';
import { AggregationsComponent } from '../aggregations/aggregations.component';
import { SmallAggregationComponent } from '../aggregation/small-aggregation.component';
import { SmallAggregationComponent } from '../small-aggregation/small-aggregation.component';
import { LargeAggregationComponent } from '../large-aggregation/large-aggregation.component';
import { AggregationNamePipe } from '../aggregation-name.pipe';
import { DocumentCountComponent } from '../document-count/document-count.component';
......
......@@ -12,7 +12,7 @@
[id]="aggregation.name + bucket.key"
[formControlName]="bucket.key">
<label class="form-check-label" [for]="aggregation.name + bucket.key">
<rare-document-count [name]="bucket.key" [count]="bucket.documentCount"></rare-document-count>
<rare-document-count [name]="displayableKey(bucket.key)" [count]="bucket.documentCount"></rare-document-count>
</label>
</div>
</div>
......
......@@ -8,10 +8,11 @@ import { toAggregation } from '../models/test-model-generators';
import { AggregationCriterion } from '../models/aggregation-criterion';
import { AggregationNamePipe } from '../aggregation-name.pipe';
import { DocumentCountComponent } from '../document-count/document-count.component';
import { NULL_VALUE } from '../models/genetic-resource.model';
describe('SmallAggregationComponent', () => {
const aggregation = toAggregation('coo', ['France', 'Italy', 'New Zealand']);
const aggregation = toAggregation('coo', ['France', 'Italy', 'New Zealand', NULL_VALUE]);
class SmallAggregationComponentTester extends ComponentTester<SmallAggregationComponent> {
constructor() {
......@@ -51,13 +52,15 @@ describe('SmallAggregationComponent', () => {
// then it should display a title
expect(tester.title).toHaveText('Pays d\'origine');
// and the buckets with their name and count
expect(tester.labels.length).toBe(3);
expect(tester.labels.length).toBe(4);
expect(tester.labels[0]).toContainText('France');
expect(tester.labels[0]).toContainText('[10]');
expect(tester.labels[1]).toContainText('Italy');
expect(tester.labels[1]).toContainText('[20]');
expect(tester.labels[2]).toContainText('New Zealand');
expect(tester.labels[2]).toContainText('[30]');
expect(tester.labels[3]).toContainText('Aucun');
expect(tester.labels[3]).toContainText('[40]');
});
it('should not display an aggregation with empty buckets', () => {
......@@ -75,13 +78,20 @@ describe('SmallAggregationComponent', () => {
it('should extract keys from selected values', () => {
// given a few selected values among a bucket
const values: { [key: string]: boolean | null } = { 'France': true, 'England': false, 'Italy': true, 'New Zealand': null };
const values: { [key: string]: boolean | null } =
{
'France': true,
'England': false,
'Italy': true,
'New Zealand': null,
[NULL_VALUE]: true
};
// when extracting keys
const keys = SmallAggregationComponent.extractKeys(values);
// then it should return only the truthy ones
expect(keys).toEqual(['France', 'Italy']);
expect(keys).toEqual(['France', 'Italy', NULL_VALUE]);
});
it('should build a form based on the bucket', () => {
......@@ -94,7 +104,7 @@ describe('SmallAggregationComponent', () => {
// then it should have a form with several fields
const controls = component.aggregationForm.controls;
expect(Object.keys(controls)).toEqual(['France', 'Italy', 'New Zealand']);
expect(Object.keys(controls)).toEqual(['France', 'Italy', 'New Zealand', NULL_VALUE]);
});
it('should build a form and check selected criteria', () => {
......@@ -110,7 +120,7 @@ describe('SmallAggregationComponent', () => {
// then it should have a form with several fields
const controls = component.aggregationForm.controls;
expect(Object.keys(controls)).toEqual(['France', 'Italy', 'New Zealand']);
expect(Object.keys(controls)).toEqual(['France', 'Italy', 'New Zealand', NULL_VALUE]);
// and France should be checked
expect(component.aggregationForm.get('France').value).toBeTruthy();
});
......
......@@ -3,6 +3,7 @@ import { FormControl, FormGroup } from '@angular/forms';
import { Aggregation } from '../models/page';
import { AggregationCriterion } from '../models/aggregation-criterion';
import { NULL_VALUE } from '../models/genetic-resource.model';
@Component({
selector: 'rare-small-aggregation',
......@@ -55,4 +56,8 @@ export class SmallAggregationComponent implements OnInit {
this.aggregationChange.emit(event);
});
}
displayableKey(key: string): string {
return key === NULL_VALUE ? 'Aucun' : key;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment