From 62e79e7694da2242d637e09a45b3611b558d6bf4 Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Thu, 26 Sep 2024 10:15:33 -0700 Subject: [PATCH 1/4] sync id field --- .../protos/semantic_model.proto | 14 +-- .../protos/semantic_model_pb2.py | 88 +++++++++++-------- .../protos/semantic_model_pb2.pyi | 2 + 3 files changed, 59 insertions(+), 45 deletions(-) diff --git a/semantic_model_generator/protos/semantic_model.proto b/semantic_model_generator/protos/semantic_model.proto index f75b62bc..1a0095ce 100644 --- a/semantic_model_generator/protos/semantic_model.proto +++ b/semantic_model_generator/protos/semantic_model.proto @@ -15,6 +15,7 @@ package semantic_model_generator; extend google.protobuf.FieldOptions { optional bool optional = 51234; optional bool sql_expression = 51235; + optional bool id_field = 51236; } // AggregationType defines a list of various aggregations. @@ -52,7 +53,7 @@ message RetrievalResult { // e.g. `base_column1 + base_column2`. message Column { // A descriptive name for this column. - string name = 1; + string name = 1 [(id_field) = true]; // A list of other terms/phrases used to refer to this column. repeated string synonyms = 2 [(optional) = true]; // A brief description about this column, including things like what data this @@ -88,7 +89,7 @@ message Column { // context_to_column_format() of snowpilot/semantic_context/protos/schema.py. message Dimension { // A descriptive name for this dimension. - string name = 1; + string name = 1 [(id_field) = true]; // A list of other terms/phrases used to refer to this dimension. repeated string synonyms = 2 [(optional) = true]; // A brief description about this dimension, including things like @@ -113,7 +114,7 @@ message Dimension { // to_column_format() of snowpilot/semantic_context/utils/utils.py. message TimeDimension { // A descriptive name for this time dimension. - string name = 1; + string name = 1 [(id_field) = true]; // A list of other terms/phrases used to refer to this time dimension. repeated string synonyms = 2 [(optional) = true]; // A brief description about this time dimension, including things like @@ -136,7 +137,7 @@ message TimeDimension { // to_column_format() of snowpilot/semantic_context/utils/utils.py. message Measure { // A descriptive name for this measure. - string name = 1; + string name = 1 [(id_field) = true]; // A list of other terms/phrases used to refer to this measure. repeated string synonyms = 2 [(optional) = true]; // A brief description about this measure, including things like what data @@ -188,7 +189,7 @@ message PrimaryKey { // table and/or introduce new derived columns. message Table { // A descriptive name for this table. - string name = 1; + string name = 1 [(id_field) = true]; // A list of other term/phrases used to refer to this table. repeated string synonyms = 2 [(optional) = true]; // A brief description of this table, including details of what kinds of @@ -221,7 +222,7 @@ message Table { // tables. message Metric { // A descriptive name of the metric. - string name = 1; + string name = 1 [(id_field) = true]; // A list of other term/phrases used to refer to this metric. repeated string synonyms = 2 [(optional) = true]; // A brief description of this metric, including details of what it computes. @@ -266,7 +267,6 @@ message Relationship { string right_table = 3; // Keys directly represent the join relationship. repeated RelationKey relationship_columns = 7; - // Type of the join. JoinType join_type = 5; // Type of the relationship. RelationshipType relationship_type = 6; diff --git a/semantic_model_generator/protos/semantic_model_pb2.py b/semantic_model_generator/protos/semantic_model_pb2.py index 298f9832..50828e1f 100644 --- a/semantic_model_generator/protos/semantic_model_pb2.py +++ b/semantic_model_generator/protos/semantic_model_pb2.py @@ -15,13 +15,15 @@ from google.protobuf import descriptor_pb2 as google_dot_protobuf_dot_descriptor__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14semantic_model.proto\x12\x18semantic_model_generator\x1a google/protobuf/descriptor.proto\"/\n\x0fRetrievalResult\x12\r\n\x05value\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\"\xc5\x03\n\x06\x43olumn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12\x32\n\x04kind\x18\x06 \x01(\x0e\x32$.semantic_model_generator.ColumnKind\x12\x14\n\x06unique\x18\x07 \x01(\x08\x42\x04\x90\x82\x19\x01\x12L\n\x13\x64\x65\x66\x61ult_aggregation\x18\x08 \x01(\x0e\x32).semantic_model_generator.AggregationTypeB\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\t \x03(\tB\x04\x90\x82\x19\x01\x12\'\n\x19index_and_retrieve_values\x18\n \x01(\x08\x42\x04\x90\x82\x19\x01\x12K\n\x12retrieved_literals\x18\x0b \x03(\x0b\x32).semantic_model_generator.RetrievalResultB\x04\x90\x82\x19\x01\x12(\n\x1a\x63ortex_search_service_name\x18\x0c \x01(\tB\x04\x90\x82\x19\x01\"\xd0\x01\n\tDimension\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12\x14\n\x06unique\x18\x06 \x01(\x08\x42\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\x07 \x03(\tB\x04\x90\x82\x19\x01\x12(\n\x1a\x63ortex_search_service_name\x18\t \x01(\tB\x04\x90\x82\x19\x01\"\xaa\x01\n\rTimeDimension\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12\x14\n\x06unique\x18\x06 \x01(\x08\x42\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\x07 \x03(\tB\x04\x90\x82\x19\x01\"\xdc\x01\n\x07Measure\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12L\n\x13\x64\x65\x66\x61ult_aggregation\x18\x06 \x01(\x0e\x32).semantic_model_generator.AggregationTypeB\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\x07 \x03(\tB\x04\x90\x82\x19\x01\"b\n\x0bNamedFilter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\"F\n\x13\x46ullyQualifiedTable\x12\x10\n\x08\x64\x61tabase\x18\x01 \x01(\t\x12\x0e\n\x06schema\x18\x02 \x01(\t\x12\r\n\x05table\x18\x03 \x01(\t\"\x1d\n\nPrimaryKey\x12\x0f\n\x07\x63olumns\x18\x01 \x03(\t\"\x85\x04\n\x05Table\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x41\n\nbase_table\x18\x04 \x01(\x0b\x32-.semantic_model_generator.FullyQualifiedTable\x12\x37\n\x07\x63olumns\x18\x05 \x03(\x0b\x32 .semantic_model_generator.ColumnB\x04\x90\x82\x19\x01\x12=\n\ndimensions\x18\t \x03(\x0b\x32#.semantic_model_generator.DimensionB\x04\x90\x82\x19\x01\x12\x46\n\x0ftime_dimensions\x18\n \x03(\x0b\x32\'.semantic_model_generator.TimeDimensionB\x04\x90\x82\x19\x01\x12\x39\n\x08measures\x18\x0b \x03(\x0b\x32!.semantic_model_generator.MeasureB\x04\x90\x82\x19\x01\x12?\n\x0bprimary_key\x18\x06 \x01(\x0b\x32$.semantic_model_generator.PrimaryKeyB\x04\x90\x82\x19\x01\x12<\n\x07\x66ilters\x18\x08 \x03(\x0b\x32%.semantic_model_generator.NamedFilterB\x04\x90\x82\x19\x01\"\x9c\x01\n\x06Metric\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12=\n\x06\x66ilter\x18\x05 \x01(\x0b\x32\'.semantic_model_generator.MetricsFilterB\x04\x90\x82\x19\x01\"#\n\rMetricsFilter\x12\x12\n\x04\x65xpr\x18\x01 \x01(\tB\x04\x98\x82\x19\x01\"8\n\x0bRelationKey\x12\x13\n\x0bleft_column\x18\x01 \x01(\t\x12\x14\n\x0cright_column\x18\x02 \x01(\t\"\x88\x02\n\x0cRelationship\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nleft_table\x18\x02 \x01(\t\x12\x13\n\x0bright_table\x18\x03 \x01(\t\x12\x43\n\x14relationship_columns\x18\x07 \x03(\x0b\x32%.semantic_model_generator.RelationKey\x12\x35\n\tjoin_type\x18\x05 \x01(\x0e\x32\".semantic_model_generator.JoinType\x12\x45\n\x11relationship_type\x18\x06 \x01(\x0e\x32*.semantic_model_generator.RelationshipType\"\xb0\x02\n\rSemanticModel\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x19\n\x0b\x64\x65scription\x18\x02 \x01(\tB\x04\x90\x82\x19\x01\x12/\n\x06tables\x18\x03 \x03(\x0b\x32\x1f.semantic_model_generator.Table\x12\x37\n\x07metrics\x18\x04 \x03(\x0b\x32 .semantic_model_generator.MetricB\x04\x90\x82\x19\x01\x12\x43\n\rrelationships\x18\x05 \x03(\x0b\x32&.semantic_model_generator.RelationshipB\x04\x90\x82\x19\x01\x12G\n\x10verified_queries\x18\x06 \x03(\x0b\x32\'.semantic_model_generator.VerifiedQueryB\x04\x90\x82\x19\x01\"\x9b\x01\n\rVerifiedQuery\x12\x0c\n\x04name\x18\x01 \x01(\t\x12!\n\x13semantic_model_name\x18\x02 \x01(\tB\x04\x90\x82\x19\x01\x12\x10\n\x08question\x18\x03 \x01(\t\x12\x11\n\x03sql\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x19\n\x0bverified_at\x18\x05 \x01(\x03\x42\x04\x90\x82\x19\x01\x12\x19\n\x0bverified_by\x18\x06 \x01(\tB\x04\x90\x82\x19\x01\"\\\n\x17VerifiedQueryRepository\x12\x41\n\x10verified_queries\x18\x01 \x03(\x0b\x32\'.semantic_model_generator.VerifiedQuery*~\n\x0f\x41ggregationType\x12\x1c\n\x18\x61ggregation_type_unknown\x10\x00\x12\x07\n\x03sum\x10\x01\x12\x07\n\x03\x61vg\x10\x02\x12\n\n\x06median\x10\x07\x12\x07\n\x03min\x10\x03\x12\x07\n\x03max\x10\x04\x12\t\n\x05\x63ount\x10\x05\x12\x12\n\x0e\x63ount_distinct\x10\x06*U\n\nColumnKind\x12\x17\n\x13\x63olumn_kind_unknown\x10\x00\x12\r\n\tdimension\x10\x01\x12\x0b\n\x07measure\x10\x02\x12\x12\n\x0etime_dimension\x10\x03*<\n\x08JoinType\x12\x15\n\x11join_type_unknown\x10\x00\x12\t\n\x05inner\x10\x01\x12\x0e\n\nleft_outer\x10\x02*R\n\x10RelationshipType\x12\x1d\n\x19relationship_type_unknown\x10\x00\x12\x0e\n\none_to_one\x10\x01\x12\x0f\n\x0bmany_to_one\x10\x02:4\n\x08optional\x12\x1d.google.protobuf.FieldOptions\x18\xa2\x90\x03 \x01(\x08\x88\x01\x01::\n\x0esql_expression\x12\x1d.google.protobuf.FieldOptions\x18\xa3\x90\x03 \x01(\x08\x88\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14semantic_model.proto\x12\x18semantic_model_generator\x1a google/protobuf/descriptor.proto\"/\n\x0fRetrievalResult\x12\r\n\x05value\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\"\xcb\x03\n\x06\x43olumn\x12\x12\n\x04name\x18\x01 \x01(\tB\x04\xa0\x82\x19\x01\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12\x32\n\x04kind\x18\x06 \x01(\x0e\x32$.semantic_model_generator.ColumnKind\x12\x14\n\x06unique\x18\x07 \x01(\x08\x42\x04\x90\x82\x19\x01\x12L\n\x13\x64\x65\x66\x61ult_aggregation\x18\x08 \x01(\x0e\x32).semantic_model_generator.AggregationTypeB\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\t \x03(\tB\x04\x90\x82\x19\x01\x12\'\n\x19index_and_retrieve_values\x18\n \x01(\x08\x42\x04\x90\x82\x19\x01\x12K\n\x12retrieved_literals\x18\x0b \x03(\x0b\x32).semantic_model_generator.RetrievalResultB\x04\x90\x82\x19\x01\x12(\n\x1a\x63ortex_search_service_name\x18\x0c \x01(\tB\x04\x90\x82\x19\x01\"\xd6\x01\n\tDimension\x12\x12\n\x04name\x18\x01 \x01(\tB\x04\xa0\x82\x19\x01\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12\x14\n\x06unique\x18\x06 \x01(\x08\x42\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\x07 \x03(\tB\x04\x90\x82\x19\x01\x12(\n\x1a\x63ortex_search_service_name\x18\t \x01(\tB\x04\x90\x82\x19\x01\"\xb0\x01\n\rTimeDimension\x12\x12\n\x04name\x18\x01 \x01(\tB\x04\xa0\x82\x19\x01\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12\x14\n\x06unique\x18\x06 \x01(\x08\x42\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\x07 \x03(\tB\x04\x90\x82\x19\x01\"\xe2\x01\n\x07Measure\x12\x12\n\x04name\x18\x01 \x01(\tB\x04\xa0\x82\x19\x01\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x11\n\tdata_type\x18\x05 \x01(\t\x12L\n\x13\x64\x65\x66\x61ult_aggregation\x18\x06 \x01(\x0e\x32).semantic_model_generator.AggregationTypeB\x04\x90\x82\x19\x01\x12\x1b\n\rsample_values\x18\x07 \x03(\tB\x04\x90\x82\x19\x01\"b\n\x0bNamedFilter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\"F\n\x13\x46ullyQualifiedTable\x12\x10\n\x08\x64\x61tabase\x18\x01 \x01(\t\x12\x0e\n\x06schema\x18\x02 \x01(\t\x12\r\n\x05table\x18\x03 \x01(\t\"\x1d\n\nPrimaryKey\x12\x0f\n\x07\x63olumns\x18\x01 \x03(\t\"\x8b\x04\n\x05Table\x12\x12\n\x04name\x18\x01 \x01(\tB\x04\xa0\x82\x19\x01\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x41\n\nbase_table\x18\x04 \x01(\x0b\x32-.semantic_model_generator.FullyQualifiedTable\x12\x37\n\x07\x63olumns\x18\x05 \x03(\x0b\x32 .semantic_model_generator.ColumnB\x04\x90\x82\x19\x01\x12=\n\ndimensions\x18\t \x03(\x0b\x32#.semantic_model_generator.DimensionB\x04\x90\x82\x19\x01\x12\x46\n\x0ftime_dimensions\x18\n \x03(\x0b\x32\'.semantic_model_generator.TimeDimensionB\x04\x90\x82\x19\x01\x12\x39\n\x08measures\x18\x0b \x03(\x0b\x32!.semantic_model_generator.MeasureB\x04\x90\x82\x19\x01\x12?\n\x0bprimary_key\x18\x06 \x01(\x0b\x32$.semantic_model_generator.PrimaryKeyB\x04\x90\x82\x19\x01\x12<\n\x07\x66ilters\x18\x08 \x03(\x0b\x32%.semantic_model_generator.NamedFilterB\x04\x90\x82\x19\x01\"\xa2\x01\n\x06Metric\x12\x12\n\x04name\x18\x01 \x01(\tB\x04\xa0\x82\x19\x01\x12\x16\n\x08synonyms\x18\x02 \x03(\tB\x04\x90\x82\x19\x01\x12\x19\n\x0b\x64\x65scription\x18\x03 \x01(\tB\x04\x90\x82\x19\x01\x12\x12\n\x04\x65xpr\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12=\n\x06\x66ilter\x18\x05 \x01(\x0b\x32\'.semantic_model_generator.MetricsFilterB\x04\x90\x82\x19\x01\"#\n\rMetricsFilter\x12\x12\n\x04\x65xpr\x18\x01 \x01(\tB\x04\x98\x82\x19\x01\"8\n\x0bRelationKey\x12\x13\n\x0bleft_column\x18\x01 \x01(\t\x12\x14\n\x0cright_column\x18\x02 \x01(\t\"\x88\x02\n\x0cRelationship\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nleft_table\x18\x02 \x01(\t\x12\x13\n\x0bright_table\x18\x03 \x01(\t\x12\x43\n\x14relationship_columns\x18\x07 \x03(\x0b\x32%.semantic_model_generator.RelationKey\x12\x35\n\tjoin_type\x18\x05 \x01(\x0e\x32\".semantic_model_generator.JoinType\x12\x45\n\x11relationship_type\x18\x06 \x01(\x0e\x32*.semantic_model_generator.RelationshipType\"\xb0\x02\n\rSemanticModel\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x19\n\x0b\x64\x65scription\x18\x02 \x01(\tB\x04\x90\x82\x19\x01\x12/\n\x06tables\x18\x03 \x03(\x0b\x32\x1f.semantic_model_generator.Table\x12\x37\n\x07metrics\x18\x04 \x03(\x0b\x32 .semantic_model_generator.MetricB\x04\x90\x82\x19\x01\x12\x43\n\rrelationships\x18\x05 \x03(\x0b\x32&.semantic_model_generator.RelationshipB\x04\x90\x82\x19\x01\x12G\n\x10verified_queries\x18\x06 \x03(\x0b\x32\'.semantic_model_generator.VerifiedQueryB\x04\x90\x82\x19\x01\"\x9b\x01\n\rVerifiedQuery\x12\x0c\n\x04name\x18\x01 \x01(\t\x12!\n\x13semantic_model_name\x18\x02 \x01(\tB\x04\x90\x82\x19\x01\x12\x10\n\x08question\x18\x03 \x01(\t\x12\x11\n\x03sql\x18\x04 \x01(\tB\x04\x98\x82\x19\x01\x12\x19\n\x0bverified_at\x18\x05 \x01(\x03\x42\x04\x90\x82\x19\x01\x12\x19\n\x0bverified_by\x18\x06 \x01(\tB\x04\x90\x82\x19\x01\"\\\n\x17VerifiedQueryRepository\x12\x41\n\x10verified_queries\x18\x01 \x03(\x0b\x32\'.semantic_model_generator.VerifiedQuery*~\n\x0f\x41ggregationType\x12\x1c\n\x18\x61ggregation_type_unknown\x10\x00\x12\x07\n\x03sum\x10\x01\x12\x07\n\x03\x61vg\x10\x02\x12\n\n\x06median\x10\x07\x12\x07\n\x03min\x10\x03\x12\x07\n\x03max\x10\x04\x12\t\n\x05\x63ount\x10\x05\x12\x12\n\x0e\x63ount_distinct\x10\x06*U\n\nColumnKind\x12\x17\n\x13\x63olumn_kind_unknown\x10\x00\x12\r\n\tdimension\x10\x01\x12\x0b\n\x07measure\x10\x02\x12\x12\n\x0etime_dimension\x10\x03*<\n\x08JoinType\x12\x15\n\x11join_type_unknown\x10\x00\x12\t\n\x05inner\x10\x01\x12\x0e\n\nleft_outer\x10\x02*R\n\x10RelationshipType\x12\x1d\n\x19relationship_type_unknown\x10\x00\x12\x0e\n\none_to_one\x10\x01\x12\x0f\n\x0bmany_to_one\x10\x02:4\n\x08optional\x12\x1d.google.protobuf.FieldOptions\x18\xa2\x90\x03 \x01(\x08\x88\x01\x01::\n\x0esql_expression\x12\x1d.google.protobuf.FieldOptions\x18\xa3\x90\x03 \x01(\x08\x88\x01\x01:4\n\x08id_field\x12\x1d.google.protobuf.FieldOptions\x18\xa4\x90\x03 \x01(\x08\x88\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'semantic_model_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None + _globals['_COLUMN'].fields_by_name['name']._loaded_options = None + _globals['_COLUMN'].fields_by_name['name']._serialized_options = b'\240\202\031\001' _globals['_COLUMN'].fields_by_name['synonyms']._loaded_options = None _globals['_COLUMN'].fields_by_name['synonyms']._serialized_options = b'\220\202\031\001' _globals['_COLUMN'].fields_by_name['description']._loaded_options = None @@ -40,6 +42,8 @@ _globals['_COLUMN'].fields_by_name['retrieved_literals']._serialized_options = b'\220\202\031\001' _globals['_COLUMN'].fields_by_name['cortex_search_service_name']._loaded_options = None _globals['_COLUMN'].fields_by_name['cortex_search_service_name']._serialized_options = b'\220\202\031\001' + _globals['_DIMENSION'].fields_by_name['name']._loaded_options = None + _globals['_DIMENSION'].fields_by_name['name']._serialized_options = b'\240\202\031\001' _globals['_DIMENSION'].fields_by_name['synonyms']._loaded_options = None _globals['_DIMENSION'].fields_by_name['synonyms']._serialized_options = b'\220\202\031\001' _globals['_DIMENSION'].fields_by_name['description']._loaded_options = None @@ -52,6 +56,8 @@ _globals['_DIMENSION'].fields_by_name['sample_values']._serialized_options = b'\220\202\031\001' _globals['_DIMENSION'].fields_by_name['cortex_search_service_name']._loaded_options = None _globals['_DIMENSION'].fields_by_name['cortex_search_service_name']._serialized_options = b'\220\202\031\001' + _globals['_TIMEDIMENSION'].fields_by_name['name']._loaded_options = None + _globals['_TIMEDIMENSION'].fields_by_name['name']._serialized_options = b'\240\202\031\001' _globals['_TIMEDIMENSION'].fields_by_name['synonyms']._loaded_options = None _globals['_TIMEDIMENSION'].fields_by_name['synonyms']._serialized_options = b'\220\202\031\001' _globals['_TIMEDIMENSION'].fields_by_name['description']._loaded_options = None @@ -62,6 +68,8 @@ _globals['_TIMEDIMENSION'].fields_by_name['unique']._serialized_options = b'\220\202\031\001' _globals['_TIMEDIMENSION'].fields_by_name['sample_values']._loaded_options = None _globals['_TIMEDIMENSION'].fields_by_name['sample_values']._serialized_options = b'\220\202\031\001' + _globals['_MEASURE'].fields_by_name['name']._loaded_options = None + _globals['_MEASURE'].fields_by_name['name']._serialized_options = b'\240\202\031\001' _globals['_MEASURE'].fields_by_name['synonyms']._loaded_options = None _globals['_MEASURE'].fields_by_name['synonyms']._serialized_options = b'\220\202\031\001' _globals['_MEASURE'].fields_by_name['description']._loaded_options = None @@ -78,6 +86,8 @@ _globals['_NAMEDFILTER'].fields_by_name['description']._serialized_options = b'\220\202\031\001' _globals['_NAMEDFILTER'].fields_by_name['expr']._loaded_options = None _globals['_NAMEDFILTER'].fields_by_name['expr']._serialized_options = b'\230\202\031\001' + _globals['_TABLE'].fields_by_name['name']._loaded_options = None + _globals['_TABLE'].fields_by_name['name']._serialized_options = b'\240\202\031\001' _globals['_TABLE'].fields_by_name['synonyms']._loaded_options = None _globals['_TABLE'].fields_by_name['synonyms']._serialized_options = b'\220\202\031\001' _globals['_TABLE'].fields_by_name['description']._loaded_options = None @@ -94,6 +104,8 @@ _globals['_TABLE'].fields_by_name['primary_key']._serialized_options = b'\220\202\031\001' _globals['_TABLE'].fields_by_name['filters']._loaded_options = None _globals['_TABLE'].fields_by_name['filters']._serialized_options = b'\220\202\031\001' + _globals['_METRIC'].fields_by_name['name']._loaded_options = None + _globals['_METRIC'].fields_by_name['name']._serialized_options = b'\240\202\031\001' _globals['_METRIC'].fields_by_name['synonyms']._loaded_options = None _globals['_METRIC'].fields_by_name['synonyms']._serialized_options = b'\220\202\031\001' _globals['_METRIC'].fields_by_name['description']._loaded_options = None @@ -120,44 +132,44 @@ _globals['_VERIFIEDQUERY'].fields_by_name['verified_at']._serialized_options = b'\220\202\031\001' _globals['_VERIFIEDQUERY'].fields_by_name['verified_by']._loaded_options = None _globals['_VERIFIEDQUERY'].fields_by_name['verified_by']._serialized_options = b'\220\202\031\001' - _globals['_AGGREGATIONTYPE']._serialized_start=2999 - _globals['_AGGREGATIONTYPE']._serialized_end=3125 - _globals['_COLUMNKIND']._serialized_start=3127 - _globals['_COLUMNKIND']._serialized_end=3212 - _globals['_JOINTYPE']._serialized_start=3214 - _globals['_JOINTYPE']._serialized_end=3274 - _globals['_RELATIONSHIPTYPE']._serialized_start=3276 - _globals['_RELATIONSHIPTYPE']._serialized_end=3358 + _globals['_AGGREGATIONTYPE']._serialized_start=3035 + _globals['_AGGREGATIONTYPE']._serialized_end=3161 + _globals['_COLUMNKIND']._serialized_start=3163 + _globals['_COLUMNKIND']._serialized_end=3248 + _globals['_JOINTYPE']._serialized_start=3250 + _globals['_JOINTYPE']._serialized_end=3310 + _globals['_RELATIONSHIPTYPE']._serialized_start=3312 + _globals['_RELATIONSHIPTYPE']._serialized_end=3394 _globals['_RETRIEVALRESULT']._serialized_start=84 _globals['_RETRIEVALRESULT']._serialized_end=131 _globals['_COLUMN']._serialized_start=134 - _globals['_COLUMN']._serialized_end=587 - _globals['_DIMENSION']._serialized_start=590 - _globals['_DIMENSION']._serialized_end=798 - _globals['_TIMEDIMENSION']._serialized_start=801 - _globals['_TIMEDIMENSION']._serialized_end=971 - _globals['_MEASURE']._serialized_start=974 - _globals['_MEASURE']._serialized_end=1194 - _globals['_NAMEDFILTER']._serialized_start=1196 - _globals['_NAMEDFILTER']._serialized_end=1294 - _globals['_FULLYQUALIFIEDTABLE']._serialized_start=1296 - _globals['_FULLYQUALIFIEDTABLE']._serialized_end=1366 - _globals['_PRIMARYKEY']._serialized_start=1368 - _globals['_PRIMARYKEY']._serialized_end=1397 - _globals['_TABLE']._serialized_start=1400 - _globals['_TABLE']._serialized_end=1917 - _globals['_METRIC']._serialized_start=1920 - _globals['_METRIC']._serialized_end=2076 - _globals['_METRICSFILTER']._serialized_start=2078 - _globals['_METRICSFILTER']._serialized_end=2113 - _globals['_RELATIONKEY']._serialized_start=2115 - _globals['_RELATIONKEY']._serialized_end=2171 - _globals['_RELATIONSHIP']._serialized_start=2174 - _globals['_RELATIONSHIP']._serialized_end=2438 - _globals['_SEMANTICMODEL']._serialized_start=2441 - _globals['_SEMANTICMODEL']._serialized_end=2745 - _globals['_VERIFIEDQUERY']._serialized_start=2748 - _globals['_VERIFIEDQUERY']._serialized_end=2903 - _globals['_VERIFIEDQUERYREPOSITORY']._serialized_start=2905 - _globals['_VERIFIEDQUERYREPOSITORY']._serialized_end=2997 + _globals['_COLUMN']._serialized_end=593 + _globals['_DIMENSION']._serialized_start=596 + _globals['_DIMENSION']._serialized_end=810 + _globals['_TIMEDIMENSION']._serialized_start=813 + _globals['_TIMEDIMENSION']._serialized_end=989 + _globals['_MEASURE']._serialized_start=992 + _globals['_MEASURE']._serialized_end=1218 + _globals['_NAMEDFILTER']._serialized_start=1220 + _globals['_NAMEDFILTER']._serialized_end=1318 + _globals['_FULLYQUALIFIEDTABLE']._serialized_start=1320 + _globals['_FULLYQUALIFIEDTABLE']._serialized_end=1390 + _globals['_PRIMARYKEY']._serialized_start=1392 + _globals['_PRIMARYKEY']._serialized_end=1421 + _globals['_TABLE']._serialized_start=1424 + _globals['_TABLE']._serialized_end=1947 + _globals['_METRIC']._serialized_start=1950 + _globals['_METRIC']._serialized_end=2112 + _globals['_METRICSFILTER']._serialized_start=2114 + _globals['_METRICSFILTER']._serialized_end=2149 + _globals['_RELATIONKEY']._serialized_start=2151 + _globals['_RELATIONKEY']._serialized_end=2207 + _globals['_RELATIONSHIP']._serialized_start=2210 + _globals['_RELATIONSHIP']._serialized_end=2474 + _globals['_SEMANTICMODEL']._serialized_start=2477 + _globals['_SEMANTICMODEL']._serialized_end=2781 + _globals['_VERIFIEDQUERY']._serialized_start=2784 + _globals['_VERIFIEDQUERY']._serialized_end=2939 + _globals['_VERIFIEDQUERYREPOSITORY']._serialized_start=2941 + _globals['_VERIFIEDQUERYREPOSITORY']._serialized_end=3033 # @@protoc_insertion_point(module_scope) diff --git a/semantic_model_generator/protos/semantic_model_pb2.pyi b/semantic_model_generator/protos/semantic_model_pb2.pyi index 9eae02f2..78ef6259 100644 --- a/semantic_model_generator/protos/semantic_model_pb2.pyi +++ b/semantic_model_generator/protos/semantic_model_pb2.pyi @@ -58,6 +58,8 @@ OPTIONAL_FIELD_NUMBER: _ClassVar[int] optional: _descriptor.FieldDescriptor SQL_EXPRESSION_FIELD_NUMBER: _ClassVar[int] sql_expression: _descriptor.FieldDescriptor +ID_FIELD_FIELD_NUMBER: _ClassVar[int] +id_field: _descriptor.FieldDescriptor class RetrievalResult(_message.Message): __slots__ = ("value", "score") From 8209279f0f9f99bda15717c2d2fbb3a4a15e56e5 Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Thu, 26 Sep 2024 10:24:07 -0700 Subject: [PATCH 2/4] sync schema py changes --- semantic_model_generator/validate/keywords.py | 94 +++++++++++++++++++ semantic_model_generator/validate/schema.py | 21 +++++ 2 files changed, 115 insertions(+) create mode 100644 semantic_model_generator/validate/keywords.py diff --git a/semantic_model_generator/validate/keywords.py b/semantic_model_generator/validate/keywords.py new file mode 100644 index 00000000..785de7d3 --- /dev/null +++ b/semantic_model_generator/validate/keywords.py @@ -0,0 +1,94 @@ +# https://docs.snowflake.com/en/sql-reference/reserved-keywords +SF_RESERVED_WORDS = { + "ACCOUNT", + "ALL", + "ALTER", + "AND", + "ANY", + "AS", + "BETWEEN", + "BY", + "CASE", + "CAST", + "CHECK", + "COLUMN", + "CONNECT", + "CONNECTION", + "CONSTRAINT", + "CREATE", + "CROSS", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "CURRENT_USER", + "DATABASE", + "DELETE", + "DISTINCT", + "DROP", + "ELSE", + "EXISTS", + "FALSE", + "FOLLOWING", + "FOR", + "FROM", + "FULL", + "GRANT", + "GROUP", + "GSCLUSTER", + "HAVING", + "ILIKE", + "IN", + "INCREMENT", + "INNER", + "INSERT", + "INTERSECT", + "INTO", + "IS", + "ISSUE", + "JOIN", + "LATERAL", + "LEFT", + "LIKE", + "LOCALTIME", + "LOCALTIMESTAMP", + "MINUS", + "NATURAL", + "NOT", + "NULL", + "OF", + "ON", + "OR", + "ORDER", + "ORGANIZATION", + "QUALIFY", + "REGEXP", + "REVOKE", + "RIGHT", + "RLIKE", + "ROW", + "ROWS", + "SAMPLE", + "SCHEMA", + "SELECT", + "SET", + "SOME", + "START", + "TABLE", + "TABLESAMPLE", + "THEN", + "TO", + "TRIGGER", + "TRUE", + "TRY_CAST", + "UNION", + "UNIQUE", + "UPDATE", + "USING", + "VALUES", + "VIEW", + "WHEN", + "WHENEVER", + "WHERE", + "WITH", +} diff --git a/semantic_model_generator/validate/schema.py b/semantic_model_generator/validate/schema.py index cf5abcd6..4a9ea26d 100644 --- a/semantic_model_generator/validate/schema.py +++ b/semantic_model_generator/validate/schema.py @@ -23,6 +23,7 @@ ) from semantic_model_generator.protos import semantic_model_pb2 +from semantic_model_generator.validate.keywords import SF_RESERVED_WORDS scalar_type_map = { FieldDescriptor.TYPE_BOOL: Bool, @@ -43,6 +44,18 @@ def validate_scalar(self, chunk): # type: ignore return chunk.contents +class IdField(Str): # type: ignore + def validate_scalar(self, chunk): # type: ignore + if not chunk.contents.replace("_", "").replace("$", "").isalnum(): + chunk.expecting_but_found( + "", + "name can only contain letters, underscores, decimal digits (0-9), and dollar signs ($).", + ) + if chunk.contents.upper() in SF_RESERVED_WORDS: + chunk.expecting_but_found("", "name cannot be a Snowflake reserved keyword") + return chunk.contents + + class VerifiedQueries(Seq): # type: ignore """ Validator for the verified_queries field. @@ -94,6 +107,10 @@ def create_schema_for_field( field_descriptor ): field_type = SqlExpression() + elif field_descriptor.type == FieldDescriptor.TYPE_STRING and _is_id_field( + field_descriptor + ): + field_type = IdField() elif field_descriptor.type in scalar_type_map: field_type = scalar_type_map[field_descriptor.type]() else: @@ -116,6 +133,10 @@ def _is_sql_expression(field_descriptor: FieldDescriptor) -> bool: return _has_field_option(field_descriptor, "sql_expression") +def _is_id_field(field_descriptor: FieldDescriptor) -> bool: + return _has_field_option(field_descriptor, "id_field") + + def _has_field_option(field_descriptor: FieldDescriptor, option_name: str) -> bool: option = list( filter( From 7bce229331e82a0e75a532d60d3e97150345050f Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Thu, 26 Sep 2024 10:29:25 -0700 Subject: [PATCH 3/4] fix test --- semantic_model_generator/tests/validate_model_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/semantic_model_generator/tests/validate_model_test.py b/semantic_model_generator/tests/validate_model_test.py index bd070da0..4191a3be 100644 --- a/semantic_model_generator/tests/validate_model_test.py +++ b/semantic_model_generator/tests/validate_model_test.py @@ -209,12 +209,10 @@ def test_invalid_yaml_missing_quote( mock_logger, temp_invalid_yaml_unmatched_quote_file, mock_snowflake_connection ): account_name = "snowflake test" - with pytest.raises(ValueError) as exc_info: + with pytest.raises(YAMLValidationError) as exc_info: validate_from_local_path(temp_invalid_yaml_unmatched_quote_file, account_name) - expected_error_fragment = ( - "Unable to validate your semantic model. Error = Unable to parse sql statement." - ) + expected_error_fragment = "name can only contain letters, underscores, decimal digits (0-9), and dollar signs ($)." assert expected_error_fragment in str(exc_info.value), "Unexpected error message" expected_log_call = mock.call.info("Successfully validated!") From 7ce4da6d97b28faefe80537574b3cb45cd23a1d1 Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Thu, 26 Sep 2024 10:35:48 -0700 Subject: [PATCH 4/4] re add comment --- semantic_model_generator/protos/semantic_model.proto | 1 + 1 file changed, 1 insertion(+) diff --git a/semantic_model_generator/protos/semantic_model.proto b/semantic_model_generator/protos/semantic_model.proto index 1a0095ce..cc2d27e7 100644 --- a/semantic_model_generator/protos/semantic_model.proto +++ b/semantic_model_generator/protos/semantic_model.proto @@ -267,6 +267,7 @@ message Relationship { string right_table = 3; // Keys directly represent the join relationship. repeated RelationKey relationship_columns = 7; + // Type of the join. JoinType join_type = 5; // Type of the relationship. RelationshipType relationship_type = 6;