Skip to content

Commit

Permalink
Respect keyed parameter in percentile_ranks aggr (#57)
Browse files Browse the repository at this point in the history
Before: dashboard is broken. We don't respect `keyed`, and return
response in incorrect format. I've already fixed it for 2 other
aggregations.

Not sure why, but I didn't add a test for this aggregation, so we had
none. Now we have 1 😆
<img width="478" alt="Screenshot 2024-05-08 at 00 15 39"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/5a45a330-cda2-432f-b249-be11a0467665">
<img width="1728" alt="Screenshot 2024-05-08 at 00 15 14"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/54a61242-8ef0-4435-9182-f47e9247b900">
After:
<img width="1725" alt="Screenshot 2024-05-08 at 18 52 39"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/9bd7eea9-a9cb-406f-a445-71974dbc087b">
  • Loading branch information
trzysiek authored May 8, 2024
1 parent 92aebdf commit 62e6b30
Show file tree
Hide file tree
Showing 3 changed files with 236 additions and 27 deletions.
97 changes: 71 additions & 26 deletions quesma/model/metrics_aggregations/percentile_ranks.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,94 @@ import (
"context"
"mitmproxy/quesma/logger"
"mitmproxy/quesma/model"
"strconv"
"strings"
)

type PercentileRanks struct {
ctx context.Context
// defines what response should look like
// https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html#_keyed_response_5
Keyed bool
}

func NewPercentileRanks(ctx context.Context) PercentileRanks {
return PercentileRanks{ctx: ctx}
func NewPercentileRanks(ctx context.Context, keyed bool) PercentileRanks {
return PercentileRanks{ctx: ctx, Keyed: keyed}
}

func (query PercentileRanks) IsBucketAggregation() bool {
return false
}

func (query PercentileRanks) TranslateSqlResponseToJson(rows []model.QueryResultRow, level int) []model.JsonMap {
valueMap := make(map[string]float64)
for _, percentileRank := range rows[0].Cols[level:] {
// percentileRank.ColName looks like this [...]<=X,[...]. We're extracting X.
// It always needs to have .Y or .YZ at the end, so 1 or 2 digits after the dot, and dot is mandatory.
// Also, can't be .00, needs to be .0
beg := strings.Index(percentileRank.ColName, "<=")
end := strings.Index(percentileRank.ColName[beg:], ",")
cutValue := percentileRank.ColName[beg+2 : beg+end]

dot := strings.Index(cutValue, ".")
if dot == -1 {
cutValue += ".0"
} else if end-dot >= len(".00") && cutValue[dot:dot+3] == ".00" {
cutValue = cutValue[:dot+2]
} else {
cutValue = cutValue[:dot+3]
if len(rows) == 0 {
logger.WarnWithCtx(query.ctx).Msg("no rows in percentile ranks response")
return make([]model.JsonMap, 0)
}
// I duplicate a lot of code in this if/else below,
// but I think it's worth it, as this function might get called a lot of times for a single query.
// And because of complete separation in if/else, I guess it might (should) be slightly faster (?)
if query.Keyed {
valueMap := make(model.JsonMap)
for _, percentileRank := range rows[0].Cols[level:] {
// percentileRank.ColName looks like this [...]<=X,[...]. We're extracting X.
// It always needs to have .Y or .YZ at the end, so 1 or 2 digits after the dot, and dot is mandatory.
// Also, can't be .00, needs to be .0
beg := strings.Index(percentileRank.ColName, "<=")
end := strings.Index(percentileRank.ColName[beg:], ",")
cutValue := percentileRank.ColName[beg+2 : beg+end]

dot := strings.Index(cutValue, ".")
if dot == -1 {
cutValue += ".0"
} else if end-dot >= len(".00") && cutValue[dot:dot+3] == ".00" {
cutValue = cutValue[:dot+2]
} else {
cutValue = cutValue[:dot+3]
}
if value, ok := percentileRank.Value.(float64); ok {
valueMap[cutValue] = value
} else {
logger.WarnWithCtx(query.ctx).Msgf("failed to convert percentile rank value to float64, type: %T, value: %v. Skipping",
percentileRank.Value, percentileRank.Value)
}
}
if value, ok := percentileRank.Value.(float64); ok {
valueMap[cutValue] = value
} else {
logger.WarnWithCtx(query.ctx).Msgf("failed to convert percentile rank value to float64, type: %T, value: %v",
percentileRank.Value, percentileRank.Value)
return []model.JsonMap{{
"values": valueMap,
}}
} else {
buckets := make([]model.JsonMap, 0)
for _, percentileRank := range rows[0].Cols[level:] {
// percentileRank.ColName looks like this [...]<=X,[...]. We're extracting X.
// It always needs to have .Y or .YZ at the end, so 1 or 2 digits after the dot, and dot is mandatory.
// Also, can't be .00, needs to be .0
beg := strings.Index(percentileRank.ColName, "<=")
end := strings.Index(percentileRank.ColName[beg:], ",")
cutValue := percentileRank.ColName[beg+2 : beg+end]

dot := strings.Index(cutValue, ".")
if dot == -1 {
cutValue += ".0"
} else if end-dot >= len(".00") && cutValue[dot:dot+3] == ".00" {
cutValue = cutValue[:dot+2]
} else {
cutValue = cutValue[:dot+3]
}
cutValueFloat, _ := strconv.ParseFloat(cutValue, 64)
if value, ok := percentileRank.Value.(float64); ok {
buckets = append(buckets, model.JsonMap{
"key": cutValueFloat,
"value": value,
})
} else {
logger.WarnWithCtx(query.ctx).Msgf("failed to convert percentile rank value to float64, type: %T, value: %v. Skipping",
percentileRank.Value, percentileRank.Value)
}
}
return []model.JsonMap{{
"values": buckets,
}}
}
return []model.JsonMap{{
"values": valueMap,
}}
}

func (query PercentileRanks) String() string {
Expand Down
14 changes: 13 additions & 1 deletion quesma/queryparser/aggregation_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"strings"
)

const keyedDefaultValuePercentileRanks = true

type filter struct {
name string
sql SimpleQuery
Expand Down Expand Up @@ -184,7 +186,7 @@ func (b *aggrQueryBuilder) buildMetricsAggregation(metricsAggr metricsAggregatio
case "value_count":
query.Type = metrics_aggregations.NewValueCount(b.ctx)
case "percentile_ranks":
query.Type = metrics_aggregations.NewPercentileRanks(b.ctx)
query.Type = metrics_aggregations.NewPercentileRanks(b.ctx, metricsAggr.Keyed)
}
return query
}
Expand Down Expand Up @@ -512,10 +514,20 @@ func (cw *ClickhouseQueryTranslator) tryMetricsAggregation(queryMap QueryMap) (m
logger.WarnWithCtx(cw.Ctx).Msgf("cutValue in percentile_ranks is not a number, but %T, value: %v. Skipping.", cutValue, cutValue)
}
}
var keyed bool
if keyedRaw, ok := percentileRanks.(QueryMap)["keyed"]; ok {
if keyed, ok = keyedRaw.(bool); !ok {
logger.WarnWithCtx(cw.Ctx).Msgf("keyed specified for percentiles aggregation is not a boolean. Querymap: %v", queryMap)
keyed = keyedDefaultValuePercentileRanks
}
} else {
keyed = keyedDefaultValuePercentileRanks
}
return metricsAggregation{
AggrType: "percentile_ranks",
FieldNames: fieldNames,
FieldType: metricsAggregationDefaultFieldType, // don't need to check, it's unimportant for this aggregation
Keyed: keyed,
}, true
}

Expand Down
152 changes: 152 additions & 0 deletions quesma/testdata/opensearch-visualize/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1124,4 +1124,156 @@ var AggregationTests = []testdata.AggregationTestCase{
`ORDER BY ("response")`,
},
},
{ // [7]
TestName: "Percentile_ranks keyed=false. Reproduce: Visualize -> Line -> Metrics: Percentile Ranks, Buckets: X-Asis Date Histogram",
QueryRequestJson: `
{
"_source": {
"excludes": []
},
"aggs": {
"2": {
"aggs": {
"1": {
"percentile_ranks": {
"field": "AvgTicketPrice",
"keyed": false,
"values": [
0,
50000
]
}
}
},
"date_histogram": {
"calendar_interval": "1h",
"field": "timestamp",
"min_doc_count": 1,
"time_zone": "Europe/Warsaw"
}
}
},
"docvalue_fields": [
{
"field": "timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [],
"must": [
{
"match_all": {}
}
],
"must_not": [],
"should": []
}
},
"script_fields": {
"hour_of_day": {
"script": {
"lang": "painless",
"source": "doc['timestamp'].value.hourOfDay"
}
}
},
"size": 0,
"stored_fields": [
"*"
]
}`,
ExpectedResponse: `
{
"_shards": {
"failed": 0,
"skipped": 0,
"successful": 1,
"total": 1
},
"aggregations": {
"2": {
"buckets": [
{
"1": {
"values": [
{
"key": 0.0,
"value": 0.0
},
{
"key": 50000.0,
"value": 100.0
}
]
},
"doc_count": 9,
"key": 1714860000000,
"key_as_string": "2024-05-04T22:00:00.000"
},
{
"1": {
"values": [
{
"key": 0.0,
"value": 0.0
},
{
"key": 50000.0,
"value": 50.0
}
]
},
"doc_count": 12,
"key": 1714863600000,
"key_as_string": "2024-05-04T23:00:00.000"
}
]
}
},
"hits": {
"hits": [],
"max_score": null,
"total": {
"relation": "eq",
"value": 884
}
},
"timed_out": false,
"took": 0
}`,
ExpectedResults: [][]model.QueryResultRow{
{{Cols: []model.QueryResultCol{model.NewQueryResultCol("hits", uint64(884))}}},
{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("key", int64(1714860000000/3600000)),
model.NewQueryResultCol("AvgTicketPrice<=0,", 0.0),
model.NewQueryResultCol("AvgTicketPrice<=50000,", 100.0)},
},
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("key", int64(1714863600000/3600000)),
model.NewQueryResultCol("AvgTicketPrice<=0,", 0.0),
model.NewQueryResultCol("AvgTicketPrice<=50000,", 50.0),
}},
},
{
{Cols: []model.QueryResultCol{model.NewQueryResultCol("key", int64(1714860000000/3600000)), model.NewQueryResultCol("doc_count", 9)}},
{Cols: []model.QueryResultCol{model.NewQueryResultCol("key", int64(1714863600000/3600000)), model.NewQueryResultCol("doc_count", 12)}},
},
},
ExpectedSQLs: []string{
`SELECT count() FROM ` + testdata.QuotedTableName + ` `,
"SELECT toInt64(toUnixTimestamp64Milli(`timestamp`)/3600000), " +
`count(if("AvgTicketPrice"<=0.000000, 1, NULL))/count(*)*100, ` +
`count(if("AvgTicketPrice"<=50000.000000, 1, NULL))/count(*)*100 ` +
`FROM ` + testdata.QuotedTableName + ` ` +
"GROUP BY (toInt64(toUnixTimestamp64Milli(`timestamp`)/3600000)) " +
"ORDER BY (toInt64(toUnixTimestamp64Milli(`timestamp`)/3600000))",
"SELECT toInt64(toUnixTimestamp64Milli(`timestamp`)/3600000), count() " +
`FROM ` + testdata.QuotedTableName + ` ` +
"GROUP BY (toInt64(toUnixTimestamp64Milli(`timestamp`)/3600000)) " +
"ORDER BY (toInt64(toUnixTimestamp64Milli(`timestamp`)/3600000))",
},
},
}

0 comments on commit 62e6b30

Please sign in to comment.