Skip to content

Commit

Permalink
Fix DateTime aggregation response when 0 rows (#30)
Browse files Browse the repository at this point in the history
In Clickhouse `min(datetime_field)` strangely returns `1st Jan 1970
00:00:000` when there are 0 rows to aggregate. That's why we also do
that, when Elastic returns `null`s. I've already played with Clickhouse
a bit and I'm pretty sure just replacing `min/max/etc` calls to
Clickhouse with `minOrNull/maxOrNull/...` will solve the issue.

I checked also numeric fields, and Elastic also returns `null`s when
there are 0 rows. So I think `...OrNull` is just what we need.

Elastic response for 0 rows:
<img width="1728" alt="Screenshot 2024-05-03 at 09 50 29"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/a73d5bac-f899-4281-aa9b-214086f7f30d">
Ours:
<img width="1660" alt="Screenshot 2024-05-03 at 09 50 13"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/1d3c7751-dcc0-4b7c-a9a8-7bf535f18aa0">

Before:
<img width="1728" alt="Screenshot 2024-05-07 at 20 40 59"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/e03367a6-7db9-4ddf-9067-a29309eca406">
After: `null` + nothing on the chart
<img width="1725" alt="Screenshot 2024-05-07 at 21 07 20"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/80fcec52-e641-478a-b3a9-4a9a1a087a15">
<img width="1385" alt="Screenshot 2024-05-07 at 21 07 14"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/97170df6-16c4-4a90-84d8-30e8f234b896">
Also some other `max` still works:
<img width="1667" alt="Screenshot 2024-05-07 at 21 20 42"
src="https://github.com/QuesmaOrg/quesma/assets/5407146/632e3dee-4c8a-463a-b953-9c7040138edb">
  • Loading branch information
trzysiek authored May 7, 2024
1 parent 7d86155 commit f83af72
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 36 deletions.
8 changes: 6 additions & 2 deletions quesma/model/metrics_aggregations/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,17 @@ func (query Stats) TranslateSqlResponseToJson(rows []model.QueryResultRow, level
if len(rows) > 0 {
resultMap = make(model.JsonMap)
for _, v := range rows[0].Cols[level:] {
// v.ColName = e.g. avg(...). We need to extract only 'avg'.
// v.ColName = e.g. avgOrNull(...). We need to extract only 'avg'.
// first: avgOrNull(..) -> avgOrNull
firstLeftBracketIndex := strings.Index(v.ColName, "(")
if firstLeftBracketIndex == -1 {
logger.Error().Msgf("invalid column name in stats aggregation: %s. Skipping", v.ColName)
continue
}
resultMap[v.ColName[:firstLeftBracketIndex]] = v.Value
fullName := v.ColName[:firstLeftBracketIndex]
// second: if ends with OrNull, then avgOrNull -> avg
withoutOrNull, _ := strings.CutSuffix(fullName, "OrNull")
resultMap[withoutOrNull] = v.Value
}
}
return []model.JsonMap{resultMap}
Expand Down
10 changes: 5 additions & 5 deletions quesma/queryparser/aggregation_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (b *aggrQueryBuilder) buildMetricsAggregation(metricsAggr metricsAggregatio
query := b.buildAggregationCommon(metadata)
switch metricsAggr.AggrType {
case "sum", "min", "max", "avg":
query.NonSchemaFields = append(query.NonSchemaFields, metricsAggr.AggrType+`("`+getFirstFieldName()+`")`)
query.NonSchemaFields = append(query.NonSchemaFields, metricsAggr.AggrType+`OrNull("`+getFirstFieldName()+`")`)
case "quantile":
for usersPercent, percentAsFloat := range metricsAggr.Percentiles {
query.NonSchemaFields = append(query.NonSchemaFields, fmt.Sprintf(
Expand All @@ -100,10 +100,10 @@ func (b *aggrQueryBuilder) buildMetricsAggregation(metricsAggr metricsAggregatio
query.NonSchemaFields = append(
query.NonSchemaFields,
"count(`"+fieldName+"`)",
"min(`"+fieldName+"`)",
"max(`"+fieldName+"`)",
"avg(`"+fieldName+"`)",
"sum(`"+fieldName+"`)",
"minOrNull(`"+fieldName+"`)",
"maxOrNull(`"+fieldName+"`)",
"avgOrNull(`"+fieldName+"`)",
"sumOrNull(`"+fieldName+"`)",
)
case "top_hits":
query.Fields = append(query.Fields, metricsAggr.FieldNames...)
Expand Down
8 changes: 4 additions & 4 deletions quesma/queryparser/aggregation_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ var aggregationTests = []struct {
}`,
[]string{
`SELECT count() FROM ` + tableNameQuoted + ` `,
`SELECT max("AvgTicketPrice") FROM ` + tableNameQuoted + ` `,
`SELECT min("AvgTicketPrice") FROM ` + tableNameQuoted + ` `,
`SELECT maxOrNull("AvgTicketPrice") FROM ` + tableNameQuoted + ` `,
`SELECT minOrNull("AvgTicketPrice") FROM ` + tableNameQuoted + ` `,
},
},
{ // [1]
Expand Down Expand Up @@ -340,7 +340,7 @@ var aggregationTests = []struct {
}`,
[]string{
`SELECT count() FROM ` + tableNameQuoted + ` `,
`SELECT sum("taxful_total_price") FROM ` + tableNameQuoted + " ",
`SELECT sumOrNull("taxful_total_price") FROM ` + tableNameQuoted + " ",
},
},
{ // [9]
Expand Down Expand Up @@ -375,7 +375,7 @@ var aggregationTests = []struct {
}`,
[]string{
`SELECT count() FROM ` + tableNameQuoted + ` `,
`SELECT avg("total_quantity") FROM ` + tableNameQuoted + " ",
`SELECT avgOrNull("total_quantity") FROM ` + tableNameQuoted + " ",
},
},
{ // [11]
Expand Down
46 changes: 28 additions & 18 deletions quesma/testdata/aggregation_requests.go

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions quesma/testdata/dashboard-1/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,12 @@ var AggregationTests = []testdata.AggregationTestCase{
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("floor(rspContentLen / 2000000.000000) * 2000000.000000", 0),
model.NewQueryResultCol("floor(rspContentLen / 2000000.000000) * 2000000.000000", 0),
model.NewQueryResultCol("avg(rspContentLen)", 42516.52153947081),
model.NewQueryResultCol("avgOrNull(rspContentLen)", 42516.52153947081),
}},
{Cols: []model.QueryResultCol{
model.NewQueryResultCol("floor(rspContentLen / 2000000.000000) * 2000000.000000", 658000000),
model.NewQueryResultCol("floor(rspContentLen / 2000000.000000) * 2000000.000000", 658000000),
model.NewQueryResultCol("avg(rspContentLen)", 658654099),
model.NewQueryResultCol("avgOrNull(rspContentLen)", 658654099),
}},
},
{
Expand Down Expand Up @@ -176,7 +176,7 @@ var AggregationTests = []testdata.AggregationTestCase{
},
ExpectedSQLs: []string{
`SELECT count() FROM ` + testdata.QuotedTableName + ` WHERE "reqTimeSec">='2024-04-24T10:55:23.606Z' AND "reqTimeSec"<='2024-04-24T11:10:23.606Z' `,
`SELECT floor("rspContentLen" / 2000000.000000) * 2000000.000000, floor("rspContentLen" / 2000000.000000) * 2000000.000000, avg("rspContentLen") ` +
`SELECT floor("rspContentLen" / 2000000.000000) * 2000000.000000, floor("rspContentLen" / 2000000.000000) * 2000000.000000, avgOrNull("rspContentLen") ` +
`FROM ` + testdata.QuotedTableName + ` WHERE "reqTimeSec">='2024-04-24T10:55:23.606Z' AND "reqTimeSec"<='2024-04-24T11:10:23.606Z' ` +
`GROUP BY (floor("rspContentLen" / 2000000.000000) * 2000000.000000, floor("rspContentLen" / 2000000.000000) * 2000000.000000) ` +
`ORDER BY (floor("rspContentLen" / 2000000.000000) * 2000000.000000, floor("rspContentLen" / 2000000.000000) * 2000000.000000)`,
Expand Down
4 changes: 2 additions & 2 deletions quesma/testdata/opensearch-visualize/aggregation_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,10 @@ var AggregationTests = []testdata.AggregationTestCase{
// TODO after merge of some PR, change logs-generic-default to testdata.QuotedTableName
`SELECT count() FROM "logs-generic-default" ` +
`WHERE "epoch_time">='2024-04-28T14:34:22.674Z' AND "epoch_time"<='2024-04-28T14:49:22.674Z' `,
`SELECT sum("properties.entry_time") FROM "logs-generic-default" ` +
`SELECT sumOrNull("properties.entry_time") FROM "logs-generic-default" ` +
`WHERE ("epoch_time">='2024-04-28T14:34:22.674Z' AND "epoch_time"<='2024-04-28T14:49:22.674Z') ` +
`AND "epoch_time_original">=0 AND "epoch_time_original"<1000 `,
`SELECT sum("properties.entry_time") FROM "logs-generic-default" ` +
`SELECT sumOrNull("properties.entry_time") FROM "logs-generic-default" ` +
`WHERE ("epoch_time">='2024-04-28T14:34:22.674Z' AND "epoch_time"<='2024-04-28T14:49:22.674Z') ` +
`AND "epoch_time_original">=1000 `,
`SELECT count(if("epoch_time_original">=0 AND "epoch_time_original"<1000, 1, NULL)), ` +
Expand Down
4 changes: 2 additions & 2 deletions quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -789,8 +789,8 @@ var TestsAsyncSearch = []AsyncSearchTestCase{
model.SearchQueryInfo{Typ: model.Normal},
[]string{
`SELECT count() FROM "logs-generic-default" WHERE "message" iLIKE '%posei%' AND "message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%' `,
`SELECT m..("@timestamp") FROM "logs-generic-default" WHERE "message" iLIKE '%posei%' AND "message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%' `,
`SELECT m..("@timestamp") FROM "logs-generic-default" WHERE "message" iLIKE '%posei%' AND "message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%' `,
`SELECT m..OrNull("@timestamp") FROM "logs-generic-default" WHERE "message" iLIKE '%posei%' AND "message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%' `,
`SELECT m..OrNull("@timestamp") FROM "logs-generic-default" WHERE "message" iLIKE '%posei%' AND "message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%' `,
},
true,
},
Expand Down

0 comments on commit f83af72

Please sign in to comment.