-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rewrite _resolve endpoint to use the schema registry (#520)
Before this change, the `_resolve` endpoint had several bugs. One of them, which affected me, was wrong result for an index present in Elastic, but missing from ClickHouse and not enabled in the config. In that case Quesma should reach to Elastic, but previously it just looked at ClickHouse and immediately returned 404. Since the previous logic was quite convoluted and used older `LogManager` instead of `schema.Registry`, this PR largely rewrites the implementation to be simpler, better documented and to use the `schema.Registry`. I extensively tested all possible scenarios (see the table in `Test_combineSourcesFromElasticWithRegistry`), both as a unit test and manually in Kibana. The resolve code is now also moved to `functionality/resolve` package. To make it possible to use the `schema.Registry` in the new code, `schema.Schema` had to be extended with new boolean flag `ExistsInDataSource`. Before the addition of this boolean flag, schema registry contained both: schemas without created table in ClickHouse (only specified/enabled in configuration) and schemas with created table in ClickHouse - no easy way to differentiate between them.
- Loading branch information
1 parent
4871087
commit d77d241
Showing
7 changed files
with
234 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// Copyright Quesma, licensed under the Elastic License 2.0. | ||
// SPDX-License-Identifier: Elastic-2.0 | ||
package resolve | ||
|
||
import ( | ||
"quesma/elasticsearch" | ||
"quesma/quesma/config" | ||
"quesma/schema" | ||
"slices" | ||
) | ||
|
||
func HandleResolve(pattern string, sr schema.Registry, cfg config.QuesmaConfiguration) (elasticsearch.Sources, error) { | ||
// In the _resolve endpoint we want to combine the results from both schema.Registry and Elasticsearch | ||
|
||
normalizedPattern := elasticsearch.NormalizePattern(pattern) | ||
|
||
// Optimization: if it's not a pattern, let's try avoiding querying Elasticsearch - let's first try | ||
// finding that index in schema.Registry: | ||
if !elasticsearch.IsIndexPattern(normalizedPattern) { | ||
if foundSchema, found := sr.FindSchema(schema.TableName(normalizedPattern)); found { | ||
if !foundSchema.ExistsInDataSource { | ||
// index configured by the user, but not present in the data source | ||
return elasticsearch.Sources{}, nil | ||
} | ||
|
||
return elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{ | ||
{ | ||
Name: normalizedPattern, | ||
BackingIndices: []string{normalizedPattern}, | ||
TimestampField: `@timestamp`, | ||
}, | ||
}, | ||
}, nil | ||
} | ||
|
||
// ...index not found in schema.Registry (meaning the user did not configure it), but it could exist in Elastic | ||
} | ||
|
||
// Combine results from both schema.Registry and Elasticsearch: | ||
|
||
// todo avoid creating new instances all the time | ||
sourcesFromElastic, _, err := elasticsearch.NewIndexResolver(cfg.Elasticsearch.Url.String()).Resolve(normalizedPattern) | ||
if err != nil { | ||
return elasticsearch.Sources{}, err | ||
} | ||
|
||
combineSourcesFromElasticWithRegistry(&sourcesFromElastic, sr.AllSchemas(), normalizedPattern) | ||
return sourcesFromElastic, nil | ||
} | ||
|
||
func combineSourcesFromElasticWithRegistry(sourcesFromElastic *elasticsearch.Sources, schemas map[schema.TableName]schema.Schema, normalizedPattern string) { | ||
sourcesFromElastic.Indices = | ||
slices.DeleteFunc(sourcesFromElastic.Indices, func(i elasticsearch.Index) bool { | ||
_, exists := schemas[schema.TableName(i.Name)] | ||
return exists | ||
}) | ||
sourcesFromElastic.DataStreams = slices.DeleteFunc(sourcesFromElastic.DataStreams, func(i elasticsearch.DataStream) bool { | ||
_, exists := schemas[schema.TableName(i.Name)] | ||
return exists | ||
}) | ||
|
||
for name, currentSchema := range schemas { | ||
indexName := name.AsString() | ||
|
||
if config.MatchName(normalizedPattern, indexName) && currentSchema.ExistsInDataSource { | ||
sourcesFromElastic.DataStreams = append(sourcesFromElastic.DataStreams, elasticsearch.DataStream{ | ||
Name: indexName, | ||
BackingIndices: []string{indexName}, | ||
TimestampField: `@timestamp`, | ||
}) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright Quesma, licensed under the Elastic License 2.0. | ||
// SPDX-License-Identifier: Elastic-2.0 | ||
package resolve | ||
|
||
import ( | ||
"github.com/stretchr/testify/assert" | ||
"quesma/elasticsearch" | ||
"quesma/schema" | ||
"testing" | ||
) | ||
|
||
func Test_combineSourcesFromElasticWithRegistry(t *testing.T) { | ||
// Expected behavior: | ||
// | ||
// # | In Elastic? | Exists in data source? | Enabled in the config (= present in schema.Registry)? | Quesma response | ||
// 1 | NO | NO | NO | Not exists | ||
// 2 | NO | NO | YES | Not exists | ||
// 3 | YES | NO | NO | Exists | ||
// 4 | YES | NO | YES | Not exist | ||
// 5 | NO | YES | NO | Not exist | ||
// 6 | NO | YES | YES | Exists | ||
// 7 | YES | YES | NO | Exists | ||
// 8 | YES | YES | YES | Exists | ||
|
||
tests := []struct { | ||
name string | ||
sourcesFromElastic elasticsearch.Sources | ||
schemas map[schema.TableName]schema.Schema | ||
normalizedPattern string | ||
expectedResult elasticsearch.Sources | ||
}{ | ||
// Cases 1, 3 (index1), 5, 7 (index1): | ||
{ | ||
name: "index not enabled in config, some unrelated index in Elastic", | ||
sourcesFromElastic: elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{{Name: "index1"}}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{}, | ||
}, | ||
schemas: map[schema.TableName]schema.Schema{}, // schema.Registry won't contain disabled indexes, even if they exist in the data source (manually created by the user) | ||
normalizedPattern: "index*", | ||
expectedResult: elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{{Name: "index1"}}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{}, | ||
}, | ||
}, | ||
// Cases 2 (index2), 4 (index1): | ||
{ | ||
name: "index enabled in config, not present in the data source; decoy index in Elastic", | ||
sourcesFromElastic: elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{{Name: "index1"} /* decoy */, {Name: "index3"}}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{}, | ||
}, | ||
schemas: map[schema.TableName]schema.Schema{ | ||
"index1": schema.Schema{ExistsInDataSource: false}, | ||
"index2": schema.Schema{ExistsInDataSource: false}, | ||
"quesma": schema.Schema{ExistsInDataSource: true}, | ||
}, | ||
normalizedPattern: "index*", | ||
expectedResult: elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{{Name: "index3"}}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{}, | ||
}, | ||
}, | ||
// Cases 6 (index2), 8 (index1, index3): | ||
{ | ||
name: "index enabled in config, present in the data source", | ||
sourcesFromElastic: elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{{Name: "index1"}, {Name: "index4"}}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{{Name: "index3"}, {Name: "index5"}}, | ||
}, | ||
schemas: map[schema.TableName]schema.Schema{ | ||
"index1": schema.Schema{ExistsInDataSource: true}, | ||
"index2": schema.Schema{ExistsInDataSource: true}, | ||
"index3": schema.Schema{ExistsInDataSource: true}, | ||
"quesma": schema.Schema{ExistsInDataSource: true}, | ||
}, | ||
normalizedPattern: "index*", | ||
expectedResult: elasticsearch.Sources{ | ||
Indices: []elasticsearch.Index{{Name: "index4"}}, | ||
Aliases: []elasticsearch.Alias{}, | ||
DataStreams: []elasticsearch.DataStream{ | ||
{Name: "index5"}, | ||
{Name: "index1", BackingIndices: []string{"index1"}, TimestampField: `@timestamp`}, | ||
{Name: "index2", BackingIndices: []string{"index2"}, TimestampField: `@timestamp`}, | ||
{Name: "index3", BackingIndices: []string{"index3"}, TimestampField: `@timestamp`}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
combineSourcesFromElasticWithRegistry(&tt.sourcesFromElastic, tt.schemas, tt.normalizedPattern) | ||
assert.ElementsMatchf(t, tt.sourcesFromElastic.Aliases, tt.expectedResult.Aliases, "Aliases don't match") | ||
assert.ElementsMatchf(t, tt.sourcesFromElastic.Indices, tt.expectedResult.Indices, "Indices don't match") | ||
assert.ElementsMatchf(t, tt.sourcesFromElastic.DataStreams, tt.expectedResult.DataStreams, "DataStreams don't match") | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.