Skip to content

Commit

Permalink
Add search explain endpoint
Browse files Browse the repository at this point in the history
The data tiering feature needs a way to indicate to the user
that a search request will be reaching into indices that
are stored in the warm tier.
Those requests are possibly slow and expensive, so we want
to inform the user about this fact.
This new endpoint can be queried from the UI with a regular search request,
before it is executed.
It returns which indices will be used for the search and whether
those indices are stored in the warm tier or not.
Furthermore it contains the timerange info for each index, so the
frontend can give the user suggestions on how to shorten the query so it
does not reach the warm tiered indices.

This kind of functionality fits perfectly into a more generic "explain"
endpoint.
So instead of a adding a special endpoint for just the warm tier index
range info, we decided to build a more generic version which also
contains a preview of the generated OS/ES query and a list of search
validation errors.

Example response:
```
{
  "search_id": "647f0565d060431199a12e96",
  "search": {
    "queries": {
      "a1647eb6-a064-4fe6-b459-1e4267d3f659": {
        "search_types": {
          "22249f29-f042-4bd8-b745-252b00a35891": {
            "query_string": "{\"from\":0,\"size\":0,\"query\":{\"bool\":{\"must\":[{\"bool\":{\"filter\":[{\"match_all\":{\"boost\":1.0}},{\"bool\":{\"adjust_pure_negative\":true,\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"range\":{\"timestamp\":{\"from\":\"2023-09-11 20:55:50.185\",\"to\":\"2024-01-18 14:49:10.185\",\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"terms\":{\"streams\":[\"63d6d52ebf9c684b3da2deb3\",\"63a5ab32e71520111ed3ce06\",\"000000000000000000000001\"],\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"track_total_hits\":2147483647,\"aggregations\":{\"agg\":{\"filters\":{\"filters\":[{\"bool\":{\"should\":[{\"exists\":{\"field\":\"source\",\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}}],\"other_bucket\":true,\"other_bucket_key\":\"_other_\"},\"aggregations\":{\"agg\":{\"terms\":{\"script\":{\"source\":\"(doc.containsKey('source') && doc['source'].size() > 0\\n? doc['source'].size() > 1\\n    ? doc['source']\\n    : String.valueOf(doc['source'].value)\\n: \\\"(Empty Value)\\\")\\n\",\"lang\":\"painless\"},\"size\":10,\"min_doc_count\":1,\"shard_min_doc_count\":0,\"show_term_doc_count_error\":false,\"order\":[{\"_count\":\"desc\"},{\"_key\":\"asc\"}]}}}},\"timestamp-min\":{\"min\":{\"field\":\"timestamp\"}},\"timestamp-max\":{\"max\":{\"field\":\"timestamp\"}}}}",
            "searched_index_ranges": [
              {
                "index_name": "graylog_0",
                "begin": 0,
                "end": 0,
                "is_warm_tiered": false
              },
              {
                "index_name": "bar_1512",
                "begin": 1705589036047,
                "end": 1705589284808,
                "is_warm_tiered": false
              },
              {
                "index_name": "bar_1513",
                "begin": 0,
                "end": 0,
                "is_warm_tiered": false
              },
              {
                "index_name": "bar_warm_1511",
                "begin": 1705588785906,
                "end": 1705589035782,
                "is_warm_tiered": true
              }
            ]
          },
          "5e9a9bfe-7a97-4835-86fd-896f40b20531": {
            "query_string": "{\"from\":0,\"size\":0,\"query\":{\"bool\":{\"must\":[{\"bool\":{\"filter\":[{\"match_all\":{\"boost\":1.0}},{\"bool\":{\"adjust_pure_negative\":true,\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"range\":{\"timestamp\":{\"from\":\"2023-09-11 20:55:50.185\",\"to\":\"2024-01-18 14:49:10.185\",\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"terms\":{\"streams\":[\"63d6d52ebf9c684b3da2deb3\",\"63a5ab32e71520111ed3ce06\",\"000000000000000000000001\"],\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"track_total_hits\":2147483647,\"aggregations\":{\"agg\":{\"filters\":{\"filters\":[{\"bool\":{\"should\":[{\"exists\":{\"field\":\"source\",\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}}],\"other_bucket\":true,\"other_bucket_key\":\"_other_\"},\"aggregations\":{\"agg\":{\"terms\":{\"script\":{\"source\":\"(doc.containsKey('source') && doc['source'].size() > 0\\n? doc['source'].size() > 1\\n    ? doc['source']\\n    : String.valueOf(doc['source'].value)\\n: \\\"(Empty Value)\\\")\\n\",\"lang\":\"painless\"},\"size\":15,\"min_doc_count\":1,\"shard_min_doc_count\":0,\"show_term_doc_count_error\":false,\"order\":[{\"_count\":\"desc\"},{\"_key\":\"asc\"}]}}}},\"timestamp-min\":{\"min\":{\"field\":\"timestamp\"}},\"timestamp-max\":{\"max\":{\"field\":\"timestamp\"}}}}",
            "searched_index_ranges": [
              {
                "index_name": "graylog_0",
                "begin": 0,
                "end": 0,
                "is_warm_tiered": false
              }
            ]
          }
        }
      }
    }
  },
  "search_errors": [
  ]
}

```

Use uniqe class names for Search API

The API generator cannot handle identical class names under the same
endpoint.

The resulting code will fail with errors like:

```
[INFO] $ /home/marco/code/graylog-project-repos/graylog2-server/graylog2-web-interface/node_modules/.bin/tsc
[INFO] target/api/Search.ts(153,11): error TS6196: 'IndexRangeResult' is declared but never used.
```
  • Loading branch information
mpfz0r committed Feb 7, 2024
1 parent effdeda commit 89b5980
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 366 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,8 @@
*/
package org.graylog.storage.elasticsearch7.views;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import io.opentelemetry.instrumentation.annotations.WithSpan;
import jakarta.inject.Inject;
import jakarta.inject.Named;
import jakarta.inject.Provider;
import org.graylog.plugins.views.search.ExplainResults;
import org.graylog.plugins.views.search.Filter;
import org.graylog.plugins.views.search.GlobalOverride;
import org.graylog.plugins.views.search.Query;
Expand Down Expand Up @@ -62,6 +57,10 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import jakarta.inject.Inject;
import jakarta.inject.Named;
import jakarta.inject.Provider;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -219,31 +218,11 @@ public Optional<QueryBuilder> generateFilterClause(Filter filter) {
return Optional.empty();
}

@Override
public ExplainResults.QueryExplainResult doExplain(SearchJob job, Query query, ESGeneratedQueryContext queryContext) {
final ImmutableMap.Builder<String, ExplainResults.ExplainResult> builder = ImmutableMap.builder();
final Map<String, SearchSourceBuilder> searchTypeQueries = queryContext.searchTypeQueries();

final DateTime nowUTCSharedBetweenSearchTypes = Tools.nowUTC();

query.searchTypes().forEach(s -> {
final Set<ExplainResults.IndexRangeResult> indicesForQuery = indexLookup.indexRangesForStreamsInTimeRange(
query.effectiveStreams(s), query.effectiveTimeRange(s, nowUTCSharedBetweenSearchTypes))
.stream().map(ExplainResults.IndexRangeResult::fromIndexRange).collect(Collectors.toSet());

final var queryString = searchTypeQueries.get(s.id()).toString();

builder.put(s.id(), new ExplainResults.ExplainResult(queryString, indicesForQuery));
});

return new ExplainResults.QueryExplainResult(builder.build());
}

@WithSpan
@Override
public QueryResult doRun(SearchJob job, Query query, ESGeneratedQueryContext queryContext) {
if (query.searchTypes().isEmpty()) {
return org.graylog.plugins.views.search.QueryResult.builder()
return QueryResult.builder()
.query(query)
.searchTypes(Collections.emptyMap())
.errors(new HashSet<>(queryContext.errors()))
Expand Down Expand Up @@ -322,7 +301,7 @@ public QueryResult doRun(SearchJob job, Query query, ESGeneratedQueryContext que
}

LOG.debug("Query {} ran for job {}", query.id(), job.getId());
return org.graylog.plugins.views.search.QueryResult.builder()
return QueryResult.builder()
.query(query)
.searchTypes(resultsMap)
.errors(new HashSet<>(queryContext.errors()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.jayway.jsonpath.JsonPath;
import jakarta.inject.Provider;
import org.graylog.plugins.views.search.Query;
import org.graylog.plugins.views.search.QueryResult;
import org.graylog.plugins.views.search.Search;
Expand All @@ -29,73 +27,49 @@
import org.graylog.plugins.views.search.elasticsearch.ElasticsearchQueryString;
import org.graylog.plugins.views.search.elasticsearch.FieldTypesLookup;
import org.graylog.plugins.views.search.elasticsearch.IndexLookup;
import org.graylog.plugins.views.search.engine.GeneratedQueryContext;
import org.graylog.plugins.views.search.engine.monitoring.collection.NoOpStatsCollector;
import org.graylog.plugins.views.search.searchfilters.db.UsedSearchFiltersToQueryStringsMapper;
import org.graylog.plugins.views.search.searchfilters.model.InlineQueryStringSearchFilter;
import org.graylog.plugins.views.search.searchfilters.model.ReferencedQueryStringSearchFilter;
import org.graylog.plugins.views.search.searchfilters.model.UsedSearchFilter;
import org.graylog.plugins.views.search.searchtypes.MessageList;
import org.graylog.plugins.views.search.searchtypes.pivot.Pivot;
import org.graylog.plugins.views.search.searchtypes.pivot.buckets.AutoInterval;
import org.graylog.plugins.views.search.searchtypes.pivot.buckets.Time;
import org.graylog.shaded.elasticsearch7.org.elasticsearch.index.query.BoolQueryBuilder;
import org.graylog.shaded.elasticsearch7.org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.graylog.shaded.elasticsearch7.org.elasticsearch.index.query.QueryBuilder;
import org.graylog.shaded.elasticsearch7.org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.graylog.storage.elasticsearch7.views.searchtypes.ESMessageList;
import org.graylog.storage.elasticsearch7.views.searchtypes.ESSearchTypeHandler;
import org.graylog.storage.elasticsearch7.views.searchtypes.pivot.ESPivot;
import org.graylog.storage.elasticsearch7.views.searchtypes.pivot.EffectiveTimeRangeExtractor;
import org.graylog.storage.elasticsearch7.views.searchtypes.pivot.buckets.ESTimeHandler;
import org.graylog.testing.jsonpath.JsonPathAssert;
import org.graylog2.indexer.ranges.MongoIndexRange;
import org.graylog2.plugin.indexer.searches.timeranges.AbsoluteRange;
import org.graylog2.plugin.indexer.searches.timeranges.RelativeRange;
import org.graylog2.plugin.indexer.searches.timeranges.TimeRange;
import org.joda.time.DateTime;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnit;
import org.mockito.junit.MockitoRule;

import jakarta.inject.Provider;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.assertj.core.api.Assertions.assertThat;
import static org.graylog2.plugin.Tools.nowUTC;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anySet;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;


public class ElasticsearchBackendTest {
@Rule
public final MockitoRule mockitoRule = MockitoJUnit.rule();
private ElasticsearchBackend backend;
private UsedSearchFiltersToQueryStringsMapper usedSearchFiltersToQueryStringsMapper;

@Mock
private IndexLookup indexLookup;

@Before
public void setup() {
Map<String, Provider<ESSearchTypeHandler<? extends SearchType>>> handlers = Maps.newHashMap();
handlers.put(MessageList.NAME, ESMessageList::new);
handlers.put(Pivot.NAME, () -> new ESPivot(Map.of(Time.NAME, new ESTimeHandler()), Map.of(), new EffectiveTimeRangeExtractor()));

usedSearchFiltersToQueryStringsMapper = mock(UsedSearchFiltersToQueryStringsMapper.class);
doReturn(Collections.emptySet()).when(usedSearchFiltersToQueryStringsMapper).map(any());
final FieldTypesLookup fieldTypesLookup = mock(FieldTypesLookup.class);
backend = new ElasticsearchBackend(handlers,
null,
indexLookup,
mock(IndexLookup.class),
(elasticsearchBackend, ssb, errors) -> new ESGeneratedQueryContext(elasticsearchBackend, ssb, errors, fieldTypesLookup),
usedSearchFiltersToQueryStringsMapper,
new NoOpStatsCollector<>(),
Expand Down Expand Up @@ -167,64 +141,4 @@ public void generatedContextHasQueryThatIncludesSearchFilters() {
.contains("method:POST")
.contains("method:GET");
}

@Test
public void testExplain() {
when(indexLookup.indexRangesForStreamsInTimeRange(anySet(), any())).thenAnswer(a -> {
if (a.getArgument(1, TimeRange.class).getFrom().getYear() < 2024) {
return Set.of(
MongoIndexRange.create("graylog_0", nowUTC(), nowUTC(), nowUTC(), 0),
MongoIndexRange.create("graylog_1", nowUTC(), nowUTC(), nowUTC(), 0),
MongoIndexRange.create("graylog_warm_2", nowUTC(), nowUTC(), nowUTC(), 0)
);
}
return Set.of(MongoIndexRange.create("graylog_0", nowUTC(), nowUTC(), nowUTC(), 0));
});

final Query query = Query.builder()
.id("query1")
.query(ElasticsearchQueryString.of("needle"))
.searchTypes(Set.of(
MessageList.builder()
.id("messagelist-1")
.build(),
Pivot.builder()
.id("pivot-1")
.rowGroups(Time.builder().field("source").interval(AutoInterval.create()).build())
.timerange(AbsoluteRange.create(DateTime.parse("2016-05-19T00:00:00.000Z"), DateTime.parse("2022-01-09T00:00:00.000Z")))
.series()
.rollup(false)
.build()
)
)
.timerange(RelativeRange.create(300))
.build();
final Search search = Search.builder().queries(ImmutableSet.of(query)).build();
final SearchJob job = new SearchJob("deadbeef", search, "admin");
final GeneratedQueryContext generatedQueryContext = backend.generate(query, Set.of());

var explainResult = backend.explain(job, query, generatedQueryContext);
assertThat(explainResult.searchTypes()).isNotNull();
assertThat(explainResult.searchTypes().get("messagelist-1")).satisfies(ml -> {
assertThat(ml).isNotNull();

assertThat(ml.searchedIndexRanges()).hasSize(1);
assertThat(ml.searchedIndexRanges()).allMatch(r -> r.indexName().equals("graylog_0"));


var ctx = JsonPath.parse(ml.queryString());
JsonPathAssert.assertThat(ctx).jsonPathAsString("$.query.bool.must[0].bool.filter[0].query_string.query").isEqualTo("needle");
});

assertThat(explainResult.searchTypes().get("pivot-1")).satisfies(ml -> {
assertThat(ml).isNotNull();
assertThat(ml.searchedIndexRanges()).hasSize(3);
assertThat(ml.searchedIndexRanges()).anyMatch(r -> r.indexName().equals("graylog_0") && !r.isWarmTiered());
assertThat(ml.searchedIndexRanges()).anyMatch(r -> r.indexName().equals("graylog_warm_2") && r.isWarmTiered());

var ctx = JsonPath.parse(ml.queryString());
JsonPathAssert.assertThat(ctx).jsonPathAsString("$.query.bool.must[0].bool.filter[0].query_string.query").isEqualTo("needle");
JsonPathAssert.assertThat(ctx).jsonPathAsString("$.aggregations.agg.date_histogram.field").isEqualTo("source");
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
*/
package org.graylog.storage.opensearch2.views;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import io.opentelemetry.instrumentation.annotations.WithSpan;
import jakarta.inject.Inject;
import jakarta.inject.Named;
import jakarta.inject.Provider;
import org.graylog.plugins.views.search.ExplainResults;
import org.graylog.plugins.views.search.Filter;
import org.graylog.plugins.views.search.GlobalOverride;
import org.graylog.plugins.views.search.Query;
Expand Down Expand Up @@ -171,7 +169,7 @@ public OSGeneratedQueryContext generate(Query query, Set<SearchError> validation

if (effectiveStreamIds.stream().noneMatch(s -> s.startsWith(Stream.DATASTREAM_PREFIX))) {
searchTypeOverrides
.must(QueryBuilders.termsQuery(Message.FIELD_STREAMS, effectiveStreamIds));
.must(QueryBuilders.termsQuery(Message.FIELD_STREAMS, effectiveStreamIds));
}

searchType.query().ifPresent(searchTypeQuery -> {
Expand Down Expand Up @@ -222,26 +220,6 @@ public Optional<QueryBuilder> generateFilterClause(Filter filter) {
return Optional.empty();
}

@Override
public ExplainResults.QueryExplainResult doExplain(SearchJob job, Query query, OSGeneratedQueryContext queryContext) {
final ImmutableMap.Builder<String, ExplainResults.ExplainResult> builder = ImmutableMap.builder();
final Map<String, SearchSourceBuilder> searchTypeQueries = queryContext.searchTypeQueries();

final DateTime nowUTCSharedBetweenSearchTypes = Tools.nowUTC();

query.searchTypes().forEach(s -> {
final Set<ExplainResults.IndexRangeResult> indicesForQuery = indexLookup.indexRangesForStreamsInTimeRange(
query.effectiveStreams(s), query.effectiveTimeRange(s, nowUTCSharedBetweenSearchTypes))
.stream().map(ExplainResults.IndexRangeResult::fromIndexRange).collect(Collectors.toSet());

final var queryString = searchTypeQueries.get(s.id()).toString();

builder.put(s.id(), new ExplainResults.ExplainResult(queryString, indicesForQuery));
});

return new ExplainResults.QueryExplainResult(builder.build());
}

@Override
@WithSpan
public QueryResult doRun(SearchJob job, Query query, OSGeneratedQueryContext queryContext) {
Expand Down
Loading

0 comments on commit 89b5980

Please sign in to comment.