Skip to content

Commit

Permalink
ip_range: support for ipv6 (#1157)
Browse files Browse the repository at this point in the history
  • Loading branch information
trzysiek authored Jan 5, 2025
1 parent e52d81d commit c54fcc2
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 28 deletions.
2 changes: 1 addition & 1 deletion docs/public/docs/limitations.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Currently supported:
including: `boolean`, `match`, `match phrase`, `multi-match`, `query string`, `nested`, `match all`, `exists`, `prefix`, `range`, `term`, `terms`, `wildcard`
- most popular [Aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html),
including: `avg`, `cardinality`, `max`, `min`, `percentile ranks`, `percentiles`, `stats`, `sum`, `top hits`, `top metrics`, `value counts`,
`date histogram`, `date range`, `filter`, `filters`, `histogram`, `range`, `singificant terms`, `terms`, `ip prefix`
`date histogram`, `date range`, `filter`, `filters`, `histogram`, `range`, `singificant terms`, `terms`, `ip prefix`, `ip range`

Which as a result allows you to run Kibana/OSD queries and dashboards on data residing in ClickHouse/Hydrolix.

Expand Down
1 change: 1 addition & 0 deletions quesma/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ require (
require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/H0llyW00dzZ/cidr v1.2.1 // indirect
github.com/apparentlymart/go-cidr v1.1.0 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
Expand Down
6 changes: 4 additions & 2 deletions quesma/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@ github.com/ClickHouse/clickhouse-go/v2 v2.30.0 h1:AG4D/hW39qa58+JHQIFOSnxyL46H6h
github.com/ClickHouse/clickhouse-go/v2 v2.30.0/go.mod h1:i9ZQAojcayW3RsdCb3YR+n+wC2h65eJsZCscZ1Z1wyo=
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/H0llyW00dzZ/cidr v1.2.1 h1:DfRHX+RqVVKZijQGO1aJSaWvN9Saan8sycK/4wrfY5g=
github.com/H0llyW00dzZ/cidr v1.2.1/go.mod h1:S+EgYkMandSAN27mGNG/CB3jeoXDAyalsvvVFpWdnXc=
github.com/DataDog/go-sqllexer v0.0.18 h1:ErBvoO7/srJLdA2ebwd+HPqD4g1kN++BP64A8qvmh9U=
github.com/DataDog/go-sqllexer v0.0.18/go.mod h1:KwkYhpFEVIq+BfobkTC1vfqm4gTi65skV/DpDBXtexc=
github.com/H0llyW00dzZ/cidr v1.2.1 h1:DfRHX+RqVVKZijQGO1aJSaWvN9Saan8sycK/4wrfY5g=
github.com/H0llyW00dzZ/cidr v1.2.1/go.mod h1:S+EgYkMandSAN27mGNG/CB3jeoXDAyalsvvVFpWdnXc=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
github.com/apparentlymart/go-cidr v1.1.0 h1:2mAhrMoF+nhXqxTzSZMUzDHkLjmIHC+Zzn4tdgBZjnU=
github.com/apparentlymart/go-cidr v1.1.0/go.mod h1:EBcsNrHc3zQeuaeCeCtQruQm+n9/YjEn/vI25Lg7Gwc=
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I=
Expand Down
2 changes: 1 addition & 1 deletion quesma/model/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ More info: https://www.elastic.co/guide/en/elasticsearch/reference/current/searc
Median absolute deviation | :x: | Global | :x: | Moving function | :wavy_dash: |
Min | :white_check_mark: | Histogram | :white_check_mark: | Moving percentiles | :x: |
Percentile ranks | :white_check_mark: | IP prefix | :white_check_mark: | Normalize | :x: |
Percentiles | :white_check_mark: | IP range | :x: | Percentiles bucket | :x: |
Percentiles | :white_check_mark: | IP range | :white_check_mark: | Percentiles bucket | :x: |
Rate | :x: | Missing | :x: | Serial differencing | :white_check_mark: |
Scripted metric | :x: | Multi-terms | :white_check_mark: | Stats bucket | :x: |
Stats | :white_check_mark: | Nested | :x: | Sum bucket | :white_check_mark: |
Expand Down
28 changes: 19 additions & 9 deletions quesma/model/bucket_aggregations/ip_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package bucket_aggregations
import (
"context"
"fmt"
"net/netip"
"quesma/logger"
"quesma/model"
"reflect"
Expand All @@ -14,8 +15,6 @@ import (
// So instead of "<= 255.255.255.255", it uses "< ::1:0:0:0"
const BiggestIpv4 = "::1:0:0:0"

// Current limitation: we expect Clickhouse field to be IPv4 (and not IPv6)

// Clickhouse table to test SQLs:
// CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log
// INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67')
Expand Down Expand Up @@ -95,23 +94,34 @@ func NewIpInterval(begin, end string, key *string) IpInterval {
}

func (interval IpInterval) ToWhereClause(field model.Expr) model.Expr {
isBegin := interval.begin != UnboundedInterval
isEnd := interval.end != UnboundedInterval && interval.end != BiggestIpv4
hasBegin := interval.hasBeginInResponse()
hasEnd := interval.hasEndInResponse()

begin := model.NewInfixExpr(field, ">=", model.NewLiteralSingleQuoteString(interval.begin))
end := model.NewInfixExpr(field, "<", model.NewLiteralSingleQuoteString(interval.end))

if isBegin && isEnd {
if hasBegin && hasEnd {
return model.NewInfixExpr(begin, "AND", end)
} else if isBegin {
} else if hasBegin {
return begin
} else if isEnd {
} else if hasEnd {
return end
} else {
return model.TrueExpr
}
}

// hasBeginInResponse returns true if we should add 'from' field to the response.
// We do that <=> begin is not 0.0.0.0 (unbounded)
func (interval IpInterval) hasBeginInResponse() bool {
return interval.begin != UnboundedInterval && netip.MustParseAddr(interval.begin) != netip.MustParseAddr("::")
}

// hasEndInResponse returns true if we should add 'to' field to the response.
func (interval IpInterval) hasEndInResponse() bool {
return interval.end != UnboundedInterval
}

// String returns key part of the response, e.g. "1.0-2.0", or "*-6.55"
func (interval IpInterval) String() string {
if interval.key != nil {
Expand Down Expand Up @@ -166,10 +176,10 @@ func (query *IpRange) CombinatorTranslateSqlResponseToJson(subGroup CombinatorGr
}

interval := query.intervals[subGroup.idx]
if interval.begin != UnboundedInterval {
if interval.hasBeginInResponse() {
response["from"] = interval.begin
}
if interval.end != UnboundedInterval {
if interval.hasEndInResponse() {
response["to"] = interval.end
}

Expand Down
32 changes: 25 additions & 7 deletions quesma/queryparser/pancake_aggregation_parser_buckets.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ package queryparser
import (
"fmt"
"github.com/H0llyW00dzZ/cidr"
cidr2 "github.com/apparentlymart/go-cidr/cidr"
"github.com/pkg/errors"
"math"
"net"
"net/netip"
"quesma/clickhouse"
"quesma/logger"
"quesma/model"
Expand Down Expand Up @@ -410,23 +412,39 @@ func (cw *ClickhouseQueryTranslator) parseIpRange(aggregation *pancakeAggregatio
rangesRaw := params["ranges"].([]any)
ranges := make([]bucket_aggregations.IpInterval, 0, len(rangesRaw))
for _, rangeRaw := range rangesRaw {
var begin, end string
var key *string
if keyIfPresent, exists := cw.parseStringFieldExistCheck(rangeRaw.(QueryMap), "key"); exists {
key = &keyIfPresent
}
var begin, end string
if maskIfExists, exists := cw.parseStringFieldExistCheck(rangeRaw.(QueryMap), "mask"); exists {
_, ipNet, err := net.ParseCIDR(maskIfExists)
if err != nil {
return err
}
beginAsInt, endAsInt := cidr.IPv4ToRange(ipNet)
begin = util.IntToIpv4(beginAsInt)
// endAsInt is inclusive, we do +1, because we need it exclusive
if endAsInt != math.MaxUint32 {
end = util.IntToIpv4(endAsInt + 1)
if ipNet.IP.To4() != nil {
// it's ipv4
beginAsInt, endAsInt := cidr.IPv4ToRange(ipNet)
begin = util.IntToIpv4(beginAsInt)
// endAsInt is inclusive, we do +1, because we need it exclusive
if endAsInt != math.MaxUint32 {
end = util.IntToIpv4(endAsInt + 1)
} else {
end = bucket_aggregations.BiggestIpv4 // "255.255.255.255 + 1", so to say (value in compliance with Elastic)
}
} else if ipNet.IP.To16() != nil {
// it's ipv6
beginInclusive, endInclusive := cidr2.AddressRange(ipNet)
begin = beginInclusive.String()
// we do +1 (.Next()), because we need end to be exclusive
endExclusive := netip.MustParseAddr(endInclusive.String()).Next()
if endExclusive.IsValid() {
end = endExclusive.String()
} else { // invalid means endInclusive was already the biggest possible value (ff...ff)
end = bucket_aggregations.UnboundedInterval
}
} else {
end = bucket_aggregations.BiggestIpv4 // "255.255.255.255 + 1", so to say (value in compliance with Elastic)
return fmt.Errorf("invalid mask: %s", maskIfExists)
}
if key == nil {
key = &maskIfExists
Expand Down
127 changes: 119 additions & 8 deletions quesma/testdata/kibana-visualize/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -3375,9 +3375,6 @@ var AggregationTests = []testdata.AggregationTestCase{
TestName: "IP range, with ranges as CIDR masks. In Kibana: Add panel > Aggregation Based > Area. Buckets: X-asis: IP Range",
QueryRequestJson: `
{
"_source": {
"excludes": []
},
"aggs": {
"2": {
"ip_range": {
Expand Down Expand Up @@ -3451,7 +3448,7 @@ var AggregationTests = []testdata.AggregationTestCase{
}},
},
ExpectedPancakeSQL: `
SELECT countIf("clientip">='255.255.255.252') AS "range_0__aggr__2__count",
SELECT countIf(("clientip">='255.255.255.252' AND "clientip"<'::1:0:0:0')) AS "range_0__aggr__2__count",
countIf("clientip">='128.129.130.131') AS "range_1__aggr__2__count",
countIf(("clientip">='10.0.7.96' AND "clientip"<'10.0.7.128')) AS
"range_2__aggr__2__count"
Expand All @@ -3461,9 +3458,6 @@ var AggregationTests = []testdata.AggregationTestCase{
TestName: "IP range, with ranges as CIDR masks, keyed=true. In Kibana: Add panel > Aggregation Based > Area. Buckets: X-asis: IP Range",
QueryRequestJson: `
{
"_source": {
"excludes": []
},
"aggs": {
"2": {
"ip_range": {
Expand Down Expand Up @@ -3535,10 +3529,127 @@ var AggregationTests = []testdata.AggregationTestCase{
}},
},
ExpectedPancakeSQL: `
SELECT countIf("clientip">='255.255.255.254') AS "range_0__aggr__2__count",
SELECT countIf(("clientip">='255.255.255.254' AND "clientip"<'::1:0:0:0')) AS "range_0__aggr__2__count",
countIf("clientip">='128.129.130.131') AS "range_1__aggr__2__count",
countIf(("clientip">='10.0.7.96' AND "clientip"<'10.0.7.128')) AS
"range_2__aggr__2__count"
FROM __quesma_table_name`,
},
{ // [27]
TestName: "IP range ipv6",
QueryRequestJson: `
{
"aggs": {
"2": {
"ip_range": {
"field": "clientip",
"ranges": [
{
"from": "1::132:13:21:23:122:22"
},
{
"to": "1::132:13:21:23:122:22"
},
{
"to": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"
}
]
}
}
},
"size": 0,
"track_total_hits": true
}`,
ExpectedResponse: `
{
"aggregations": {
"2": {
"buckets": [
{
"key": "1::132:13:21:23:122:22-*",
"from": "1::132:13:21:23:122:22",
"doc_count": 7290
},
{
"key": "*-1::132:13:21:23:122:22",
"to": "1::132:13:21:23:122:22",
"doc_count": 6784
},
{
"key": "*-ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"to": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
"doc_count": 999999
}
]
}
}
}`,
ExpectedPancakeResults: []model.QueryResultRow{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("range_0__aggr__2__count", int64(7290)),
model.NewQueryResultCol("range_1__aggr__2__count", int64(6784)),
model.NewQueryResultCol("range_2__aggr__2__count", int64(999999)),
}},
},
ExpectedPancakeSQL: `
SELECT countIf("clientip">='1::132:13:21:23:122:22') AS
"range_0__aggr__2__count",
countIf("clientip"<'1::132:13:21:23:122:22') AS "range_1__aggr__2__count",
countIf("clientip"<'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff') AS
"range_2__aggr__2__count"
FROM __quesma_table_name`,
},
{ // [28]
TestName: "IP range ipv6 with mask",
QueryRequestJson: `
{
"aggs": {
"2": {
"ip_range": {
"field": "clientip",
"ranges": [
{
"mask": "::/2"
},
{
"mask": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127"
}
]
}
}
},
"size": 0,
"track_total_hits": true
}`,
ExpectedResponse: `
{
"aggregations": {
"2": {
"buckets": [
{
"key": "::/2",
"to": "4000::",
"doc_count": 1
},
{
"key": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127",
"from": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe",
"doc_count": 0
}
]
}
}
}`,
ExpectedPancakeResults: []model.QueryResultRow{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("range_0__aggr__2__count", int64(1)),
model.NewQueryResultCol("range_1__aggr__2__count", int64(0)),
}},
},
ExpectedPancakeSQL: `
SELECT countIf("clientip"<'4000::') AS "range_0__aggr__2__count",
countIf("clientip">='ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe') AS
"range_1__aggr__2__count"
FROM __quesma_table_name`,
},
}

0 comments on commit c54fcc2

Please sign in to comment.