diff --git a/conf/solr/schema.xml b/conf/solr/schema.xml index f4121de97c1..77b7cebdaf3 100644 --- a/conf/solr/schema.xml +++ b/conf/solr/schema.xml @@ -277,38 +277,38 @@ - - - + + + - - - - + + + + - - - + + + - - + + - + - - - + + + - + @@ -316,7 +316,7 @@ - + @@ -328,12 +328,12 @@ - + - + @@ -353,19 +353,19 @@ - + - + - + @@ -389,28 +389,28 @@ - - + + - + - + - - + + + - @@ -538,12 +538,12 @@ - + @@ -574,8 +574,8 @@ - + @@ -599,9 +599,9 @@ - + @@ -631,13 +631,13 @@ - - + + - + @@ -649,10 +649,10 @@ + - @@ -819,7 +819,9 @@ - + + + diff --git a/doc/release-notes/10887-solr-field-types.md b/doc/release-notes/10887-solr-field-types.md new file mode 100644 index 00000000000..2d8225172af --- /dev/null +++ b/doc/release-notes/10887-solr-field-types.md @@ -0,0 +1,82 @@ +This release enhances how numerical and date fields are indexed in Solr. Previously, all fields were indexed as English text (text_en), but with this update: + +* Integer fields are indexed as `plong` +* Float fields are indexed as `pdouble` +* Date fields are indexed as `date_range` (`solr.DateRangeField`) + +Specifically, the following fields were updated: + +- coverage.Depth +- coverage.ObjectCount +- coverage.ObjectDensity +- coverage.Redshift.MaximumValue +- coverage.Redshift.MinimumValue +- coverage.RedshiftValue +- coverage.SkyFraction +- coverage.Spectral.CentralWavelength +- coverage.Spectral.MaximumWavelength +- coverage.Spectral.MinimumWavelength +- coverage.Temporal.StartTime +- coverage.Temporal.StopTime +- dateOfCollectionEnd +- dateOfCollectionStart +- dateOfDeposit +- distributionDate +- dsDescriptionDate +- journalPubDate +- productionDate +- resolution.Redshift +- targetSampleActualSize +- timePeriodCoveredEnd +- timePeriodCoveredStart + +This change enables range queries when searching from both the UI and the API, such as `dateOfDeposit:[2000-01-01 TO 2014-12-31]` or `targetSampleActualSize:[25 TO 50]`. + +Dataverse administrators must update their Solr schema.xml (manually or by rerunning `update-fields.sh`) and reindex all datasets. + +Additionally, search result highlighting is now more accurate, ensuring that only fields relevant to the query are highlighted in search results. If the query is specifically limited to certain fields, the highlighting is now limited to those fields as well. + +## Upgrade Instructions + +7\. Update Solr schema.xml file. Start with the standard v6.5 schema.xml, then, if your installation uses any custom or experimental metadata blocks, update it to include the extra fields (step 7a). + +Stop Solr (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/6.5/installation/prerequisites.html#solr-init-script)). + +```shell +service solr stop +``` + +Replace schema.xml + +```shell +wget https://raw.githubusercontent.com/IQSS/dataverse/v6.5/conf/solr/schema.xml +cp schema.xml /usr/local/solr/solr-9.4.1/server/solr/collection1/conf +``` + +Start Solr (but if you use any custom metadata blocks, perform the next step, 7a first). + +```shell +service solr start +``` + +7a\. For installations with custom or experimental metadata blocks: + +Before starting Solr, update the schema to include all the extra metadata fields that your installation uses. We do this by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed to reflect the names of the directories, if different): + +```shell + wget https://raw.githubusercontent.com/IQSS/dataverse/v6.5/conf/solr/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.4.1/server/solr/collection1/conf/schema.xml +``` + +Now start Solr. + +8\. Reindex Solr + +Below is the simplest way to reindex Solr: + +```shell +curl http://localhost:8080/api/admin/index +``` + +The API above rebuilds the existing index "in place". If you want to be absolutely sure that your index is up-to-date and consistent, you may consider wiping it clean and reindexing everything from scratch (see [the guides](https://guides.dataverse.org/en/latest/admin/solr-search-index.html)). Just note that, depending on the size of your database, a full reindex may take a while and the users will be seeing incomplete search results during that window. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java index 01785359e0e..2c385268fa5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java @@ -531,15 +531,14 @@ public String getDisplayName() { public SolrField getSolrField() { SolrField.SolrType solrType = SolrField.SolrType.TEXT_EN; if (fieldType != null) { - - /** - * @todo made more decisions based on fieldType: index as dates, - * integers, and floats so we can do range queries etc. - */ if (fieldType.equals(FieldType.DATE)) { solrType = SolrField.SolrType.DATE; } else if (fieldType.equals(FieldType.EMAIL)) { solrType = SolrField.SolrType.EMAIL; + } else if (fieldType.equals(FieldType.INT)) { + solrType = SolrField.SolrType.INTEGER; + } else if (fieldType.equals(FieldType.FLOAT)) { + solrType = SolrField.SolrType.FLOAT; } Boolean parentAllowsMultiplesBoolean = false; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 9b7998b0a8e..3f60a9bd1a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -27,6 +27,8 @@ import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; @@ -44,6 +46,7 @@ import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import jakarta.annotation.PostConstruct; import jakarta.annotation.PreDestroy; @@ -1065,34 +1068,89 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set indexableValues = dsf.getValuesWithoutNaValues().stream() + .filter(s -> intPattern.matcher(s).find()) + .collect(Collectors.toList()); + solrInputDocument.addField(solrFieldSearchable, indexableValues); + if (dsfType.getSolrField().isFacetable()) { + solrInputDocument.addField(solrFieldFacetable, indexableValues); + } + } else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.FLOAT)) { + // same as for integer values, we need to filter invalid float values + List indexableValues = dsf.getValuesWithoutNaValues().stream() + .filter(s -> { + try { + Double.parseDouble(s); + return true; + } catch (NumberFormatException e) { + return false; + } + }) + .collect(Collectors.toList()); + solrInputDocument.addField(solrFieldSearchable, indexableValues); + if (dsfType.getSolrField().isFacetable()) { + solrInputDocument.addField(solrFieldFacetable, indexableValues); + } } else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) { + // Solr accepts dates in the ISO-8601 format, e.g. YYYY-MM-DDThh:mm:ssZ, YYYYY-MM-DD, YYYY-MM, YYYY + // See: https://solr.apache.org/guide/solr/latest/indexing-guide/date-formatting-math.html + // If dates have been entered in other formats, we need to skip or convert them + // TODO at the moment we are simply skipping, but converting them would offer more value for search + // For use in facets, we index only the year (YYYY) String dateAsString = ""; if (!dsf.getValues_nondisplay().isEmpty()) { - dateAsString = dsf.getValues_nondisplay().get(0); - } + dateAsString = dsf.getValues_nondisplay().get(0).trim(); + } + logger.fine("date as string: " + dateAsString); + if (dateAsString != null && !dateAsString.isEmpty()) { - SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH); - try { - /** - * @todo when bean validation is working we - * won't have to convert strings into dates - */ - logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId()); - Date dateAsDate = inputDateyyyy.parse(dateAsString); - SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy"); - String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate); - logger.fine("YYYY only: " + datasetFieldFlaggedAsDate); - // solrInputDocument.addField(solrFieldSearchable, - // Integer.parseInt(datasetFieldFlaggedAsDate)); - solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate); - if (dsfType.getSolrField().isFacetable()) { - // solrInputDocument.addField(solrFieldFacetable, + boolean dateValid = false; + + DateTimeFormatter[] possibleFormats = { + DateTimeFormatter.ISO_INSTANT, + DateTimeFormatter.ofPattern("yyyy-MM-dd"), + DateTimeFormatter.ofPattern("yyyy-MM"), + DateTimeFormatter.ofPattern("yyyy") + }; + for (DateTimeFormatter format : possibleFormats){ + try { + format.parse(dateAsString); + dateValid = true; + } catch (DateTimeParseException e) { + // no-op, date is invalid + } + } + + if (!dateValid) { + logger.fine("couldn't index " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " because it's not a valid date format according to Solr"); + } else { + SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH); + try { + /** + * @todo when bean validation is working we + * won't have to convert strings into dates + */ + logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId()); + Date dateAsDate = inputDateyyyy.parse(dateAsString); + SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy"); + String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate); + logger.fine("YYYY only: " + datasetFieldFlaggedAsDate); + // solrInputDocument.addField(solrFieldSearchable, // Integer.parseInt(datasetFieldFlaggedAsDate)); - solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate); + solrInputDocument.addField(solrFieldSearchable, dateAsString); + if (dsfType.getSolrField().isFacetable()) { + // solrInputDocument.addField(solrFieldFacetable, + // Integer.parseInt(datasetFieldFlaggedAsDate)); + solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate); + } + } catch (Exception ex) { + logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")"); } - } catch (Exception ex) { - logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")"); } } } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 60bcc9f846e..493dbfcecc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -280,7 +280,7 @@ public SolrQueryResponse search( List datasetFields = datasetFieldService.findAllOrderedById(); Map solrFieldsToHightlightOnMap = new HashMap<>(); if (addHighlights) { - solrQuery.setHighlight(true).setHighlightSnippets(1); + solrQuery.setHighlight(true).setHighlightSnippets(1).setHighlightRequireFieldMatch(true); Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); if (fragSize != null) { solrQuery.setHighlightFragsize(fragSize); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrField.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrField.java index ca9805b6c57..7092a01beb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrField.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrField.java @@ -63,7 +63,7 @@ public enum SolrType { * support range queries) in * https://github.com/IQSS/dataverse/issues/370 */ - STRING("string"), TEXT_EN("text_en"), INTEGER("int"), LONG("long"), DATE("text_en"), EMAIL("text_en"); + STRING("string"), TEXT_EN("text_en"), INTEGER("plong"), FLOAT("pdouble"), DATE("date_range"), EMAIL("text_en"); private String type; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index c97762526b0..00c41073ebe 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -1303,6 +1303,327 @@ public void testGeospatialSearchInvalid() { } + @Test + public void testRangeQueries() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Using the "astrophysics" block because it contains all field types relevant for range queries + // (int, float and date) + Response setMetadataBlocks = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken); + setMetadataBlocks.prettyPrint(); + setMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode()); + + JsonObjectBuilder datasetJson = Json.createObjectBuilder() + .add("datasetVersion", Json.createObjectBuilder() + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", "Test Astrophysics Dataset") + .add("typeClass", "primitive") + .add("multiple", false) + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", "Simpson, Homer") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "authorName")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", "hsimpson@mailinator.com") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "datasetContactEmail")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", "This is a test dataset.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "dsDescriptionValue")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Other") + ) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject") + ) + ) + ) + .add("astrophysics", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "coverage.Temporal") + .add("typeClass", "compound") + .add("multiple", true) + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("coverage.Temporal.StartTime", + Json.createObjectBuilder() + .add("value", "2015-01-01") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "coverage.Temporal.StartTime") + ) + ) + ) + ) + .add(Json.createObjectBuilder() + .add("typeName", "coverage.ObjectCount") + .add("typeClass", "primitive") + .add("multiple", false) + .add("value", "9000") + ) + .add(Json.createObjectBuilder() + .add("typeName", "coverage.SkyFraction") + .add("typeClass", "primitive") + .add("multiple", false) + .add("value", "0.002") + ) + ) + ) + )); + + Response createDatasetResponse = UtilIT.createDataset(dataverseAlias, datasetJson, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId"); + + // Integer range query: Hit + Response search1 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.ObjectCount:[1000 TO 10000]", apiToken, "&show_entity_ids=true"); + search1.prettyPrint(); + search1.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)); + + // Integer range query: Miss + Response search2 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.ObjectCount:[* TO 1000]", apiToken); + search2.prettyPrint(); + search2.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(0)) + .body("data.count_in_response", CoreMatchers.is(0)); + + // Float range query: Hit + Response search3 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.SkyFraction:[0 TO 0.5]", apiToken, "&show_entity_ids=true"); + search3.prettyPrint(); + search3.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)); + + // Float range query: Miss + Response search4 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.SkyFraction:[0.5 TO 1]", apiToken); + search4.prettyPrint(); + search4.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(0)) + .body("data.count_in_response", CoreMatchers.is(0)); + + // Date range query: Hit + Response search5 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.Temporal.StartTime:2015", apiToken, "&show_entity_ids=true"); + search5.prettyPrint(); + search5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)); + + // Date range query: Miss + Response search6 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.Temporal.StartTime:[2020 TO *]", apiToken); + search6.prettyPrint(); + search6.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(0)) + .body("data.count_in_response", CoreMatchers.is(0)); + + // Combining all three range queries: Hit + Response search7 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.ObjectCount:[1000 TO 10000] AND coverage.SkyFraction:[0 TO 0.5] AND coverage.Temporal.StartTime:2015", apiToken, "&show_entity_ids=true"); + search7.prettyPrint(); + search7.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)); + + // Combining all three range queries: Miss + Response search8 = UtilIT.search("id:dataset_" + datasetId + "_draft AND coverage.ObjectCount:[* TO 1000] AND coverage.SkyFraction:[0.5 TO 1] AND coverage.Temporal.StartTime:[2020 TO *]", apiToken); + search8.prettyPrint(); + search8.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(0)) + .body("data.count_in_response", CoreMatchers.is(0)); + + } + + @Test + public void testSearchWithInvalidDateField() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response setMetadataBlocks = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation"), apiToken); + setMetadataBlocks.prettyPrint(); + setMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode()); + + // Adding a dataset with a date in the "timePeriodCoveredStart" field that doesn't match Solr's date format + // (ISO-8601 format, e.g. YYYY-MM-DDThh:mm:ssZ, YYYYY-MM-DD, YYYY-MM, YYYY) + // (See: https://solr.apache.org/guide/solr/latest/indexing-guide/date-formatting-math.html) + // So the date currently cannot be indexed + JsonObjectBuilder datasetJson = Json.createObjectBuilder() + .add("datasetVersion", Json.createObjectBuilder() + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", "Test Dataset") + .add("typeClass", "primitive") + .add("multiple", false) + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", "Simpson, Homer") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "authorName")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", "hsimpson@mailinator.com") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "datasetContactEmail")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", "This is a test dataset.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "dsDescriptionValue")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Other") + ) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject") + ) + .add(Json.createObjectBuilder() + .add("typeName", "timePeriodCovered") + .add("typeClass", "compound") + .add("multiple", true) + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("timePeriodCoveredStart", + Json.createObjectBuilder() + .add("value", "15-01-01") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "timePeriodCoveredStart") + ) + ) + ) + ) + ) + ) + )); + + Response createDatasetResponse = UtilIT.createDataset(dataverseAlias, datasetJson, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId"); + + // When querying on the date field: miss (because the date field was skipped during indexing) + Response search1 = UtilIT.search("id:dataset_" + datasetId + "_draft AND timePeriodCoveredStart:[2000 TO 2020]", apiToken); + search1.prettyPrint(); + search1.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(0)) + .body("data.count_in_response", CoreMatchers.is(0)); + + // When querying not on the date field: the dataset can be found (only the date field was skipped during indexing, not the entire dataset) + Response search2 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true"); + search2.prettyPrint(); + search2.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)); + + } + @AfterEach public void tearDownDataverse() { File treesThumb = new File("scripts/search/data/binary/trees.png.thumb48");