From c54fcc20a0b46dea5770066b01c9fb5831214d93 Mon Sep 17 00:00:00 2001 From: Krzysztof Kiewicz Date: Sun, 5 Jan 2025 17:01:44 +0100 Subject: [PATCH] `ip_range`: support for `ipv6` (#1157) --- docs/public/docs/limitations.md | 2 +- quesma/go.mod | 1 + quesma/go.sum | 6 +- quesma/model/README.md | 2 +- quesma/model/bucket_aggregations/ip_range.go | 28 ++-- .../pancake_aggregation_parser_buckets.go | 32 ++++- .../kibana-visualize/aggregation_requests.go | 127 ++++++++++++++++-- 7 files changed, 170 insertions(+), 28 deletions(-) diff --git a/docs/public/docs/limitations.md b/docs/public/docs/limitations.md index 87ca9a913..66b00acef 100644 --- a/docs/public/docs/limitations.md +++ b/docs/public/docs/limitations.md @@ -34,7 +34,7 @@ Currently supported: including: `boolean`, `match`, `match phrase`, `multi-match`, `query string`, `nested`, `match all`, `exists`, `prefix`, `range`, `term`, `terms`, `wildcard` - most popular [Aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html), including: `avg`, `cardinality`, `max`, `min`, `percentile ranks`, `percentiles`, `stats`, `sum`, `top hits`, `top metrics`, `value counts`, - `date histogram`, `date range`, `filter`, `filters`, `histogram`, `range`, `singificant terms`, `terms`, `ip prefix` + `date histogram`, `date range`, `filter`, `filters`, `histogram`, `range`, `singificant terms`, `terms`, `ip prefix`, `ip range` Which as a result allows you to run Kibana/OSD queries and dashboards on data residing in ClickHouse/Hydrolix. diff --git a/quesma/go.mod b/quesma/go.mod index 055da1e49..cae03c989 100644 --- a/quesma/go.mod +++ b/quesma/go.mod @@ -41,6 +41,7 @@ require ( require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/H0llyW00dzZ/cidr v1.2.1 // indirect + github.com/apparentlymart/go-cidr v1.1.0 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect diff --git a/quesma/go.sum b/quesma/go.sum index 39f2d9293..2254fcd1d 100644 --- a/quesma/go.sum +++ b/quesma/go.sum @@ -7,15 +7,17 @@ github.com/ClickHouse/clickhouse-go/v2 v2.30.0 h1:AG4D/hW39qa58+JHQIFOSnxyL46H6h github.com/ClickHouse/clickhouse-go/v2 v2.30.0/go.mod h1:i9ZQAojcayW3RsdCb3YR+n+wC2h65eJsZCscZ1Z1wyo= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= -github.com/H0llyW00dzZ/cidr v1.2.1 h1:DfRHX+RqVVKZijQGO1aJSaWvN9Saan8sycK/4wrfY5g= -github.com/H0llyW00dzZ/cidr v1.2.1/go.mod h1:S+EgYkMandSAN27mGNG/CB3jeoXDAyalsvvVFpWdnXc= github.com/DataDog/go-sqllexer v0.0.18 h1:ErBvoO7/srJLdA2ebwd+HPqD4g1kN++BP64A8qvmh9U= github.com/DataDog/go-sqllexer v0.0.18/go.mod h1:KwkYhpFEVIq+BfobkTC1vfqm4gTi65skV/DpDBXtexc= +github.com/H0llyW00dzZ/cidr v1.2.1 h1:DfRHX+RqVVKZijQGO1aJSaWvN9Saan8sycK/4wrfY5g= +github.com/H0llyW00dzZ/cidr v1.2.1/go.mod h1:S+EgYkMandSAN27mGNG/CB3jeoXDAyalsvvVFpWdnXc= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= +github.com/apparentlymart/go-cidr v1.1.0 h1:2mAhrMoF+nhXqxTzSZMUzDHkLjmIHC+Zzn4tdgBZjnU= +github.com/apparentlymart/go-cidr v1.1.0/go.mod h1:EBcsNrHc3zQeuaeCeCtQruQm+n9/YjEn/vI25Lg7Gwc= github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0= github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= diff --git a/quesma/model/README.md b/quesma/model/README.md index 70564f204..837567c89 100644 --- a/quesma/model/README.md +++ b/quesma/model/README.md @@ -28,7 +28,7 @@ More info: https://www.elastic.co/guide/en/elasticsearch/reference/current/searc Median absolute deviation | :x: | Global | :x: | Moving function | :wavy_dash: | Min | :white_check_mark: | Histogram | :white_check_mark: | Moving percentiles | :x: | Percentile ranks | :white_check_mark: | IP prefix | :white_check_mark: | Normalize | :x: | - Percentiles | :white_check_mark: | IP range | :x: | Percentiles bucket | :x: | + Percentiles | :white_check_mark: | IP range | :white_check_mark: | Percentiles bucket | :x: | Rate | :x: | Missing | :x: | Serial differencing | :white_check_mark: | Scripted metric | :x: | Multi-terms | :white_check_mark: | Stats bucket | :x: | Stats | :white_check_mark: | Nested | :x: | Sum bucket | :white_check_mark: | diff --git a/quesma/model/bucket_aggregations/ip_range.go b/quesma/model/bucket_aggregations/ip_range.go index e34bdbd1f..5befd892b 100644 --- a/quesma/model/bucket_aggregations/ip_range.go +++ b/quesma/model/bucket_aggregations/ip_range.go @@ -5,6 +5,7 @@ package bucket_aggregations import ( "context" "fmt" + "net/netip" "quesma/logger" "quesma/model" "reflect" @@ -14,8 +15,6 @@ import ( // So instead of "<= 255.255.255.255", it uses "< ::1:0:0:0" const BiggestIpv4 = "::1:0:0:0" -// Current limitation: we expect Clickhouse field to be IPv4 (and not IPv6) - // Clickhouse table to test SQLs: // CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log // INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67') @@ -95,23 +94,34 @@ func NewIpInterval(begin, end string, key *string) IpInterval { } func (interval IpInterval) ToWhereClause(field model.Expr) model.Expr { - isBegin := interval.begin != UnboundedInterval - isEnd := interval.end != UnboundedInterval && interval.end != BiggestIpv4 + hasBegin := interval.hasBeginInResponse() + hasEnd := interval.hasEndInResponse() begin := model.NewInfixExpr(field, ">=", model.NewLiteralSingleQuoteString(interval.begin)) end := model.NewInfixExpr(field, "<", model.NewLiteralSingleQuoteString(interval.end)) - if isBegin && isEnd { + if hasBegin && hasEnd { return model.NewInfixExpr(begin, "AND", end) - } else if isBegin { + } else if hasBegin { return begin - } else if isEnd { + } else if hasEnd { return end } else { return model.TrueExpr } } +// hasBeginInResponse returns true if we should add 'from' field to the response. +// We do that <=> begin is not 0.0.0.0 (unbounded) +func (interval IpInterval) hasBeginInResponse() bool { + return interval.begin != UnboundedInterval && netip.MustParseAddr(interval.begin) != netip.MustParseAddr("::") +} + +// hasEndInResponse returns true if we should add 'to' field to the response. +func (interval IpInterval) hasEndInResponse() bool { + return interval.end != UnboundedInterval +} + // String returns key part of the response, e.g. "1.0-2.0", or "*-6.55" func (interval IpInterval) String() string { if interval.key != nil { @@ -166,10 +176,10 @@ func (query *IpRange) CombinatorTranslateSqlResponseToJson(subGroup CombinatorGr } interval := query.intervals[subGroup.idx] - if interval.begin != UnboundedInterval { + if interval.hasBeginInResponse() { response["from"] = interval.begin } - if interval.end != UnboundedInterval { + if interval.hasEndInResponse() { response["to"] = interval.end } diff --git a/quesma/queryparser/pancake_aggregation_parser_buckets.go b/quesma/queryparser/pancake_aggregation_parser_buckets.go index 4b65d0319..f33f0eab0 100644 --- a/quesma/queryparser/pancake_aggregation_parser_buckets.go +++ b/quesma/queryparser/pancake_aggregation_parser_buckets.go @@ -6,9 +6,11 @@ package queryparser import ( "fmt" "github.com/H0llyW00dzZ/cidr" + cidr2 "github.com/apparentlymart/go-cidr/cidr" "github.com/pkg/errors" "math" "net" + "net/netip" "quesma/clickhouse" "quesma/logger" "quesma/model" @@ -410,23 +412,39 @@ func (cw *ClickhouseQueryTranslator) parseIpRange(aggregation *pancakeAggregatio rangesRaw := params["ranges"].([]any) ranges := make([]bucket_aggregations.IpInterval, 0, len(rangesRaw)) for _, rangeRaw := range rangesRaw { + var begin, end string var key *string if keyIfPresent, exists := cw.parseStringFieldExistCheck(rangeRaw.(QueryMap), "key"); exists { key = &keyIfPresent } - var begin, end string if maskIfExists, exists := cw.parseStringFieldExistCheck(rangeRaw.(QueryMap), "mask"); exists { _, ipNet, err := net.ParseCIDR(maskIfExists) if err != nil { return err } - beginAsInt, endAsInt := cidr.IPv4ToRange(ipNet) - begin = util.IntToIpv4(beginAsInt) - // endAsInt is inclusive, we do +1, because we need it exclusive - if endAsInt != math.MaxUint32 { - end = util.IntToIpv4(endAsInt + 1) + if ipNet.IP.To4() != nil { + // it's ipv4 + beginAsInt, endAsInt := cidr.IPv4ToRange(ipNet) + begin = util.IntToIpv4(beginAsInt) + // endAsInt is inclusive, we do +1, because we need it exclusive + if endAsInt != math.MaxUint32 { + end = util.IntToIpv4(endAsInt + 1) + } else { + end = bucket_aggregations.BiggestIpv4 // "255.255.255.255 + 1", so to say (value in compliance with Elastic) + } + } else if ipNet.IP.To16() != nil { + // it's ipv6 + beginInclusive, endInclusive := cidr2.AddressRange(ipNet) + begin = beginInclusive.String() + // we do +1 (.Next()), because we need end to be exclusive + endExclusive := netip.MustParseAddr(endInclusive.String()).Next() + if endExclusive.IsValid() { + end = endExclusive.String() + } else { // invalid means endInclusive was already the biggest possible value (ff...ff) + end = bucket_aggregations.UnboundedInterval + } } else { - end = bucket_aggregations.BiggestIpv4 // "255.255.255.255 + 1", so to say (value in compliance with Elastic) + return fmt.Errorf("invalid mask: %s", maskIfExists) } if key == nil { key = &maskIfExists diff --git a/quesma/testdata/kibana-visualize/aggregation_requests.go b/quesma/testdata/kibana-visualize/aggregation_requests.go index 23c393893..fc207cb37 100644 --- a/quesma/testdata/kibana-visualize/aggregation_requests.go +++ b/quesma/testdata/kibana-visualize/aggregation_requests.go @@ -3375,9 +3375,6 @@ var AggregationTests = []testdata.AggregationTestCase{ TestName: "IP range, with ranges as CIDR masks. In Kibana: Add panel > Aggregation Based > Area. Buckets: X-asis: IP Range", QueryRequestJson: ` { - "_source": { - "excludes": [] - }, "aggs": { "2": { "ip_range": { @@ -3451,7 +3448,7 @@ var AggregationTests = []testdata.AggregationTestCase{ }}, }, ExpectedPancakeSQL: ` - SELECT countIf("clientip">='255.255.255.252') AS "range_0__aggr__2__count", + SELECT countIf(("clientip">='255.255.255.252' AND "clientip"<'::1:0:0:0')) AS "range_0__aggr__2__count", countIf("clientip">='128.129.130.131') AS "range_1__aggr__2__count", countIf(("clientip">='10.0.7.96' AND "clientip"<'10.0.7.128')) AS "range_2__aggr__2__count" @@ -3461,9 +3458,6 @@ var AggregationTests = []testdata.AggregationTestCase{ TestName: "IP range, with ranges as CIDR masks, keyed=true. In Kibana: Add panel > Aggregation Based > Area. Buckets: X-asis: IP Range", QueryRequestJson: ` { - "_source": { - "excludes": [] - }, "aggs": { "2": { "ip_range": { @@ -3535,10 +3529,127 @@ var AggregationTests = []testdata.AggregationTestCase{ }}, }, ExpectedPancakeSQL: ` - SELECT countIf("clientip">='255.255.255.254') AS "range_0__aggr__2__count", + SELECT countIf(("clientip">='255.255.255.254' AND "clientip"<'::1:0:0:0')) AS "range_0__aggr__2__count", countIf("clientip">='128.129.130.131') AS "range_1__aggr__2__count", countIf(("clientip">='10.0.7.96' AND "clientip"<'10.0.7.128')) AS "range_2__aggr__2__count" FROM __quesma_table_name`, }, + { // [27] + TestName: "IP range ipv6", + QueryRequestJson: ` + { + "aggs": { + "2": { + "ip_range": { + "field": "clientip", + "ranges": [ + { + "from": "1::132:13:21:23:122:22" + }, + { + "to": "1::132:13:21:23:122:22" + }, + { + "to": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" + } + ] + } + } + }, + "size": 0, + "track_total_hits": true + }`, + ExpectedResponse: ` + { + "aggregations": { + "2": { + "buckets": [ + { + "key": "1::132:13:21:23:122:22-*", + "from": "1::132:13:21:23:122:22", + "doc_count": 7290 + }, + { + "key": "*-1::132:13:21:23:122:22", + "to": "1::132:13:21:23:122:22", + "doc_count": 6784 + }, + { + "key": "*-ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", + "to": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", + "doc_count": 999999 + } + ] + } + } + }`, + ExpectedPancakeResults: []model.QueryResultRow{ + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("range_0__aggr__2__count", int64(7290)), + model.NewQueryResultCol("range_1__aggr__2__count", int64(6784)), + model.NewQueryResultCol("range_2__aggr__2__count", int64(999999)), + }}, + }, + ExpectedPancakeSQL: ` + SELECT countIf("clientip">='1::132:13:21:23:122:22') AS + "range_0__aggr__2__count", + countIf("clientip"<'1::132:13:21:23:122:22') AS "range_1__aggr__2__count", + countIf("clientip"<'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff') AS + "range_2__aggr__2__count" + FROM __quesma_table_name`, + }, + { // [28] + TestName: "IP range ipv6 with mask", + QueryRequestJson: ` + { + "aggs": { + "2": { + "ip_range": { + "field": "clientip", + "ranges": [ + { + "mask": "::/2" + }, + { + "mask": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127" + } + ] + } + } + }, + "size": 0, + "track_total_hits": true + }`, + ExpectedResponse: ` + { + "aggregations": { + "2": { + "buckets": [ + { + "key": "::/2", + "to": "4000::", + "doc_count": 1 + }, + { + "key": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127", + "from": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", + "doc_count": 0 + } + ] + } + } + }`, + ExpectedPancakeResults: []model.QueryResultRow{ + {Cols: []model.QueryResultCol{ + model.NewQueryResultCol("range_0__aggr__2__count", int64(1)), + model.NewQueryResultCol("range_1__aggr__2__count", int64(0)), + }}, + }, + ExpectedPancakeSQL: ` + SELECT countIf("clientip"<'4000::') AS "range_0__aggr__2__count", + countIf("clientip">='ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe') AS + "range_1__aggr__2__count" + FROM __quesma_table_name`, + }, }