diff --git a/quesma/quesma/schema_array_transformer.go b/quesma/quesma/schema_array_transformer.go index 0ccf39c92..3adfba283 100644 --- a/quesma/quesma/schema_array_transformer.go +++ b/quesma/quesma/schema_array_transformer.go @@ -16,6 +16,130 @@ import ( // // +// Aggregate functions names, generated from ClickHouse documentation: +// git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git +// cd ClickHouse/docs/en/sql-reference/aggregate-functions/reference +// find . -type f | cut -c3- | rev | cut -c4- | rev | sort + +var aggregateFunctions = map[string]bool{ + "aggthrow": true, + "analysis_of_variance": true, + "any": true, + "anyheavy": true, + "anylast": true, + "approxtopk": true, + "approxtopsum": true, + "argmax": true, + "argmin": true, + "arrayconcatagg": true, + "avg": true, + "avgweighted": true, + "boundrat": true, + "categoricalinformationvalue": true, + "contingency": true, + "corr": true, + "corrmatrix": true, + "corrstable": true, + "count": true, + "covarpop": true, + "covarpopmatrix": true, + "covarpopstable": true, + "covarsamp": true, + "covarsampmatrix": true, + "covarsampstable": true, + "cramersv": true, + "cramersvbiascorrected": true, + "deltasum": true, + "deltasumtimestamp": true, + "entropy": true, + "exponentialmovingaverage": true, + "exponentialtimedecayedavg": true, + "exponentialtimedecayedcount": true, + "exponentialtimedecayedmax": true, + "exponentialtimedecayedsum": true, + "first_value": true, + "flame_graph": true, + "grouparray": true, + "grouparrayinsertat": true, + "grouparrayintersect": true, + "grouparraylast": true, + "grouparraymovingavg": true, + "grouparraymovingsum": true, + "grouparraysample": true, + "grouparraysorted": true, + "groupbitand": true, + "groupbitmap": true, + "groupbitmapand": true, + "groupbitmapor": true, + "groupbitmapxor": true, + "groupbitor": true, + "groupbitxor": true, + "groupconcat": true, + "groupuniqarray": true, + "index": true, + "intervalLengthSum": true, + "kolmogorovsmirnovtest": true, + "kurtpop": true, + "kurtsamp": true, + "largestTriangleThreeBuckets": true, + "last_value": true, + "mannwhitneyutest": true, + "max": true, + "maxintersections": true, + "maxintersectionsposition": true, + "maxmap": true, + "meanztest": true, + "median": true, + "min": true, + "minmap": true, + "quantile": true, + "quantileGK": true, + "quantilebfloat16": true, + "quantileddsketch": true, + "quantiledeterministic": true, + "quantileexact": true, + "quantileexactweighted": true, + "quantileinterpolatedweighted": true, + "quantiles": true, + "quantiletdigest": true, + "quantiletdigestweighted": true, + "quantiletiming": true, + "quantiletimingweighted": true, + "rankCorr": true, + "simplelinearregression": true, + "singlevalueornull": true, + "skewpop": true, + "skewsamp": true, + "sparkbar": true, + "stddevpop": true, + "stddevpopstable": true, + "stddevsamp": true, + "stddevsampstable": true, + "stochasticlinearregression": true, + "stochasticlogisticregression": true, + "studentttest": true, + "sum": true, + "sumcount": true, + "sumkahan": true, + "summap": true, + "summapwithoverflow": true, + "sumwithoverflow": true, + "theilsu": true, + "topk": true, + "topkweighted": true, + "uniq": true, + "uniqcombined": true, + "uniqcombined64": true, + "uniqexact": true, + "uniqhll12": true, + "uniqthetasketch": true, + "varpop": true, + "varpopstable": true, + "varsamp": true, + "varsampstable": true, + "welchttest": true, +} + type arrayTypeResolver struct { indexSchema schema.Schema } @@ -81,13 +205,28 @@ func NewArrayTypeVisitor(resolver arrayTypeResolver) model.ExprVisitor { if ok { dbType := resolver.dbColumnType(column.ColumnName) if strings.HasPrefix(dbType, "Array") { - if strings.HasPrefix(e.Name, "sum") { - // here we apply -Array combinator to the sum function + funcName := e.Name + + ifSuffix := strings.HasSuffix(funcName, "If") + if ifSuffix { + funcName = strings.TrimSuffix(funcName, "If") + } + orNullSuffix := strings.HasSuffix(funcName, "OrNull") + if orNullSuffix { + funcName = strings.TrimSuffix(funcName, "OrNull") + } + + if aggregateFunctions[strings.ToLower(funcName)] { + // Use a variant of the function with "Array" suffix: // https://clickhouse.com/docs/en/sql-reference/aggregate-functions/combinators#-array - // - // TODO this can be rewritten to transform all aggregate functions as well - // - e.Name = strings.ReplaceAll(e.Name, "sum", "sumArray") + newName := funcName + "Array" + if orNullSuffix { + newName = newName + "OrNull" + } + if ifSuffix { + newName = newName + "If" + } + e.Name = newName } else { logger.Error().Msgf("Unhandled array function %s, column %v (%v)", e.Name, column.ColumnName, dbType) } @@ -98,6 +237,15 @@ func NewArrayTypeVisitor(resolver arrayTypeResolver) model.ExprVisitor { args := b.VisitChildren(e.Args) return model.NewFunction(e.Name, args...) } + + visitor.OverrideVisitColumnRef = func(b *model.BaseExprVisitor, e model.ColumnRef) interface{} { + dbType := resolver.dbColumnType(e.ColumnName) + if strings.HasPrefix(dbType, "Array") { + logger.Error().Msgf("Unhandled array column ref %v (%v)", e.ColumnName, dbType) + } + return e + } + return visitor } @@ -148,23 +296,6 @@ func checkIfGroupingByArrayColumn(selectCommand model.SelectCommand, resolver ar return &e } - visitor.OverrideVisitFunction = func(b *model.BaseExprVisitor, e model.FunctionExpr) interface{} { - - if strings.HasPrefix(e.Name, "sum") || strings.HasPrefix(e.Name, "count") { - - if len(e.Args) > 0 { - arg := e.Args[0] - - if isArrayColumn(arg) { - found = true - } - - } - - } - return e - } - selectCommand.Accept(visitor) return found diff --git a/quesma/quesma/schema_transformer_test.go b/quesma/quesma/schema_transformer_test.go index d5430fb66..cb9b91399 100644 --- a/quesma/quesma/schema_transformer_test.go +++ b/quesma/quesma/schema_transformer_test.go @@ -487,7 +487,7 @@ func Test_arrayType(t *testing.T) { FromClause: model.NewTableRef("kibana_sample_data_ecommerce"), Columns: []model.Expr{ model.NewColumnRef("order_date"), - model.NewAliasedExpr(model.NewFunction("sumOrNull", model.NewFunction("arrayJoin", model.NewColumnRef("products_quantity"))), "column_1"), + model.NewAliasedExpr(model.NewFunction("sumArrayOrNull", model.NewColumnRef("products_quantity")), "column_1"), }, GroupBy: []model.Expr{model.NewColumnRef("order_date")}, },