-
Notifications
You must be signed in to change notification settings - Fork 506
/
Copy pathequality.sql
166 lines (123 loc) · 5.98 KB
/
equality.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
{% test equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None) %}
{{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, exclude_columns, precision)) }}
{% endtest %}
{% macro default__test_equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None) %}
{%- if compare_columns and exclude_columns -%}
{{ exceptions.raise_compiler_error("Both a compare and an ignore list were provided to the `equality` macro. Only one is allowed") }}
{%- endif -%}
{% set set_diff %}
count(*) + coalesce(abs(
sum(case when which_diff = 'a_minus_b' then 1 else 0 end) -
sum(case when which_diff = 'b_minus_a' then 1 else 0 end)
), 0)
{% endset %}
{#-- Needs to be set at parse time, before we return '' below --#}
{{ config(fail_calc = set_diff) }}
{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
{%- if not execute -%}
{{ return('') }}
{% endif %}
-- setup
{%- do dbt_utils._is_relation(model, 'test_equality') -%}
{# Ensure there are no extra columns in the compare_model vs model #}
{%- if not compare_columns -%}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- do dbt_utils._is_ephemeral(compare_model, 'test_equality') -%}
{%- set model_columns = adapter.get_columns_in_relation(model) -%}
{%- set compare_model_columns = adapter.get_columns_in_relation(compare_model) -%}
{%- if exclude_columns -%}
{#-- Lower case ignore columns for easier comparison --#}
{%- set exclude_columns = exclude_columns | map("lower") | list %}
{# Filter out the excluded columns #}
{%- set include_columns = [] %}
{%- set include_model_columns = [] %}
{%- for column in model_columns -%}
{%- if column.name | lower not in exclude_columns -%}
{% do include_columns.append(column) %}
{%- endif %}
{%- endfor %}
{%- for column in compare_model_columns -%}
{%- if column.name | lower not in exclude_columns -%}
{% do include_model_columns.append(column) %}
{%- endif %}
{%- endfor %}
{%- set compare_columns_set = set(include_columns | map(attribute='quoted') | map("lower")) %}
{%- set compare_model_columns_set = set(include_model_columns | map(attribute='quoted') | map("lower")) %}
{%- else -%}
{%- set compare_columns_set = set(model_columns | map(attribute='quoted') | map("lower")) %}
{%- set compare_model_columns_set = set(compare_model_columns | map(attribute='quoted') | map("lower")) %}
{%- endif -%}
{% if compare_columns_set != compare_model_columns_set %}
{{ exceptions.raise_compiler_error(compare_model ~" has less columns than " ~ model ~ ", please ensure they have the same columns or use the `compare_columns` or `exclude_columns` arguments to subset them.") }}
{% endif %}
{% endif %}
{%- if not precision -%}
{%- if not compare_columns -%}
{#
You cannot get the columns in an ephemeral model (due to not existing in the information schema),
so if the user does not provide an explicit list of columns we must error in the case it is ephemeral
#}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- set compare_columns = adapter.get_columns_in_relation(model)-%}
{%- if exclude_columns -%}
{#-- Lower case ignore columns for easier comparison --#}
{%- set exclude_columns = exclude_columns | map("lower") | list %}
{# Filter out the excluded columns #}
{%- set include_columns = [] %}
{%- for column in compare_columns -%}
{%- if column.name | lower not in exclude_columns -%}
{% do include_columns.append(column) %}
{%- endif %}
{%- endfor %}
{%- set compare_columns = include_columns | map(attribute='quoted') %}
{%- else -%} {# Compare columns provided #}
{%- set compare_columns = compare_columns | map(attribute='quoted') %}
{%- endif -%}
{%- endif -%}
{% set compare_cols_csv = compare_columns | join(', ') %}
{% else %} {# Precision required #}
{#-
If rounding is required, we need to get the types, so it cannot be ephemeral even if they provide column names
-#}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- set columns = adapter.get_columns_in_relation(model) -%}
{% set columns_list = [] %}
{%- for col in columns -%}
{%- if (
(col.name|lower in compare_columns|map('lower') or not compare_columns) and
(col.name|lower not in exclude_columns|map('lower') or not exclude_columns)
) -%}
{# Databricks double type is not picked up by any number type checks in dbt #}
{%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%}
{# Cast is required due to postgres not having round for a double precision number #}
{%- do columns_list.append('round(cast(' ~ col.quoted ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.quoted) -%}
{%- else -%} {# Non-numeric type #}
{%- do columns_list.append(col.quoted) -%}
{%- endif -%}
{% endif %}
{%- endfor -%}
{% set compare_cols_csv = columns_list | join(', ') %}
{% endif %}
with a as (
select * from {{ model }}
),
b as (
select * from {{ compare_model }}
),
a_minus_b as (
select {{compare_cols_csv}} from a
{{ dbt.except() }}
select {{compare_cols_csv}} from b
),
b_minus_a as (
select {{compare_cols_csv}} from b
{{ dbt.except() }}
select {{compare_cols_csv}} from a
),
unioned as (
select 'a_minus_b' as which_diff, a_minus_b.* from a_minus_b
union all
select 'b_minus_a' as which_diff, b_minus_a.* from b_minus_a
)
select * from unioned
{% endmacro %}