Merge pull request #164 from fivetran/bug/missing-sla-policies

bug/missing-sla-policies
fivetran · Aug 30, 2024 · 66d9622 · 66d9622
2 parents 031e845 + 0c382e5
commit 66d9622
Show file tree

Hide file tree

Showing 12 changed files with 168 additions and 66 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -58,7 +58,7 @@ steps:
     commands: |
       bash .buildkite/scripts/run_models.sh redshift
 
-  - label: ":bricks: Run Tests - Databricks"
+  - label: ":databricks: Run Tests - Databricks"
     key: "run_dbt_databricks"
     plugins:
       - docker#v3.13.0:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,14 +1,24 @@
 # dbt_zendesk v0.17.0
 
 ## Breaking Changes (Full refresh required after upgrading)
-- Incremental models running on BigQuery have had the `partition_by` logic adjusted to include a granularity of a month. This change only impacts BigQuery warehouses and was applied to avoid the common `too many partitions` error some users have experienced when partitioning by day. Therefore, adjusting the partition to a month granularity will decrease the number of partitions created and allow for more performant querying and incremental loads. This change was applied to the following models:
+- Incremental models running on BigQuery have had the `partition_by` logic adjusted to include a granularity of a month. This change only impacts BigQuery warehouses and was applied to avoid the common `too many partitions` error some users have experienced when partitioning by day. Therefore, adjusting the partition to a month granularity will decrease the number of partitions created and allow for more performant querying and incremental loads. This change was applied to the following models ([#165](https://github.com/fivetran/dbt_zendesk/pull/165)):
   - `int_zendesk__field_calendar_spine`
   - `int_zendesk__field_history_pivot`
   - `zendesk__ticket_field_history`
 
-## Under the Hood
-- Updated seed files to reflect a real world ticket field history update scenario.
-- Modified the `consistency_sla_policy_count` validation test to group by `ticket_id` for more accurate testing.
+## Bug Fixes
+- Fixed an issue in the `zendesk__sla_policies` model where tickets that were opened and solved outside of scheduled hours were not being reported, specifically for the metrics `requester_wait_time` and `agent_work_time`. 
+  - Resolved by adjusting the join logic in models `int_zendesk__agent_work_time_business_hours` and `int_zendesk__requester_wait_time_business_hours`. ([#164](https://github.com/fivetran/dbt_zendesk/pull/164), [#156](https://github.com/fivetran/dbt_zendesk/pull/156))
+- Fixed as issue in the `zendesk__ticket_metrics` model where certain tickets had miscalculated metrics.
+  - Resolved by adjusting the join logic in models `int_zendesk__ticket_work_time_business`, `int_zendesk__ticket_first_resolution_time_business`, and `int_zendesk__ticket_full_resolution_time_business`. ([#167](https://github.com/fivetran/dbt_zendesk/pull/167))
+
+## Under the hood
+- Added integrity validations:
+  - Test to ensure `zendesk__sla_policies` and `zendesk__ticket_metrics` models produce consistent time results. ([#164](https://github.com/fivetran/dbt_zendesk/pull/164))
+  - Test to ensure `zendesk__ticket_metrics` contains all the tickets found in `stg_zendesk__ticket`.
+- Modified the `consistency_sla_policy_count` validation test to group by `ticket_id` for more accurate testing. ([#165](https://github.com/fivetran/dbt_zendesk/pull/165))
+- Reduced the weeks looking ahead from 208 to 52 to improve performance, as tracking ticket SLAs beyond one year was unnecessary. ([#156](https://github.com/fivetran/dbt_zendesk/pull/156), [#167](https://github.com/fivetran/dbt_zendesk/pull/167))
+- Updated seed files to reflect a real world ticket field history update scenario. ([#165](https://github.com/fivetran/dbt_zendesk/pull/165))
 
 # dbt_zendesk v0.16.0
 ## 🚨 Minor Upgrade 🚨

diff --git a/integration_tests/tests/consistency/consistency_sla_policies.sql b/integration_tests/tests/consistency/consistency_sla_policies.sql
@@ -5,43 +5,63 @@
 ) }}
 
 with prod as (
-    select
+    select 
         ticket_id,
-        metric, 
+        sla_policy_name,
+        metric,
         sla_applied_at,
-        sla_elapsed_time,
+        target,
+        in_business_hours,
+        sla_breach_at,
+        round(sla_elapsed_time, -1) as sla_elapsed_time, --round to the nearest tens
+        is_active_sla,
         is_sla_breach
     from {{ target.schema }}_zendesk_prod.zendesk__sla_policies
 ),
 
 dev as (
     select
         ticket_id,
-        metric, 
+        sla_policy_name,
+        metric,
         sla_applied_at,
-        sla_elapsed_time,
+        target,
+        in_business_hours,
+        sla_breach_at,
+        round(sla_elapsed_time, -1) as sla_elapsed_time, --round to the nearest tens
+        is_active_sla,
         is_sla_breach
     from {{ target.schema }}_zendesk_dev.zendesk__sla_policies
 ),
 
+prod_not_in_dev as (
+    -- rows from prod not found in dev
+    select * from prod
+    except distinct
+    select * from dev
+),
+
+dev_not_in_prod as (
+    -- rows from dev not found in prod
+    select * from dev
+    except distinct
+    select * from prod
+),
+
 final as (
-    select 
-        prod.ticket_id,
-        prod.metric,
-        prod.sla_applied_at,
-        prod.sla_elapsed_time as prod_sla_elapsed_time,
-        dev.sla_elapsed_time as dev_sla_elapsed_time,
-        prod.is_sla_breach as prod_is_sla_breach,
-        dev.is_sla_breach as dev_is_sla_breach
-    from prod
-    full outer join dev 
-        on dev.ticket_id = prod.ticket_id
-            and dev.metric = prod.metric
-            and dev.sla_applied_at = prod.sla_applied_at
+    select
+        *,
+        'from prod' as source
+    from prod_not_in_dev
+
+    union all -- union since we only care if rows are produced
+
+    select
+        *,
+        'from dev' as source
+    from dev_not_in_prod
 )
 
 select *
 from final
-where (abs(prod_sla_elapsed_time - dev_sla_elapsed_time) >= 5
-    or prod_is_sla_breach != dev_is_sla_breach)
-    {{ "and prod.ticket_id not in " ~ var('fivetran_consistency_sla_policies_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policies_exclusion_tickets',[]) }}
+{{ "where ticket_id not in " ~ var('fivetran_consistency_sla_policies_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policies_exclusion_tickets',[]) }}
diff --git a/integration_tests/tests/consistency/consistency_sla_policy_count.sql b/integration_tests/tests/consistency/consistency_sla_policy_count.sql
@@ -9,6 +9,7 @@ with prod as (
         ticket_id,
         count(*) as total_slas
     from {{ target.schema }}_zendesk_prod.zendesk__sla_policies
+    {{ "where ticket_id not in " ~ var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) }}
     group by 1
 ),
 
@@ -17,13 +18,14 @@ dev as (
         ticket_id,
         count(*) as total_slas
     from {{ target.schema }}_zendesk_dev.zendesk__sla_policies
+    {{ "where ticket_id not in " ~ var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) ~ "" if var('fivetran_consistency_sla_policy_count_exclusion_tickets',[]) }}
     group by 1
 ),
 
 final as (
     select 
-        prod.ticket_id,
-        dev.ticket_id,
+        prod.ticket_id as prod_ticket_id,
+        dev.ticket_id as dev_ticket_id,
         prod.total_slas as prod_sla_total,
         dev.total_slas as dev_sla_total
     from prod
@@ -33,4 +35,4 @@ final as (
 
 select *
 from final
-where prod_sla_total != dev_sla_total
+where prod_sla_total != dev_sla_total
diff --git a/integration_tests/tests/integrity/metrics_count_match.sql b/integration_tests/tests/integrity/metrics_count_match.sql
@@ -0,0 +1,24 @@
+
+{{ config(
+    tags="fivetran_validations",
+    enabled=var('fivetran_validation_tests_enabled', false)
+) }}
+
+-- check that all the tickets are accounted for in the metrics
+with stg_count as (
+    select
+        count(*) as stg_ticket_count
+    from {{ ref('stg_zendesk__ticket') }}
+),
+
+metric_count as (
+    select
+        count(*) as metric_ticket_count
+    from source
+    from {{ ref('zendesk__ticket_metrics') }}
+)
+
+select *
+from stg_count
+join metric_count
+    on stg_ticket_count != metric_ticket_count
diff --git a/integration_tests/tests/integrity/sla_metrics_parity.sql b/integration_tests/tests/integrity/sla_metrics_parity.sql
@@ -0,0 +1,36 @@
+{{ config(
+    tags="fivetran_validations",
+    enabled=var('fivetran_validation_tests_enabled', false)
+) }}
+
+/*
+This test is to ensure the sla_elapsed_time from zendesk__sla_policies matches the corresponding time in zendesk__ticket_metrics.
+*/
+
+with dev_slas as (
+    select *
+    from {{ target.schema }}_zendesk_dev.zendesk__sla_policies
+    where in_business_hours
+
+), dev_metrics as (
+    select *
+    from {{ target.schema }}_zendesk_dev.zendesk__ticket_metrics
+
+), dev_compare as (
+    select 
+        dev_slas.ticket_id,
+        dev_slas.metric,
+        cast(dev_slas.sla_elapsed_time as {{ dbt.type_int() }}) as time_from_slas,
+        case when metric = 'agent_work_time' then dev_metrics.agent_work_time_in_business_minutes
+            when metric = 'requester_wait_time' then dev_metrics.requester_wait_time_in_business_minutes
+            when metric = 'first_reply_time' then dev_metrics.first_reply_time_business_minutes
+        end as time_from_metrics
+    from dev_slas
+    left join dev_metrics
+        on dev_metrics.ticket_id = dev_slas.ticket_id
+)
+
+select *
+from dev_compare
+where abs(time_from_slas - time_from_metrics) >= 5
+{{ "and ticket_id not in " ~ var('fivetran_integrity_sla_metric_parity_exclusion_tickets',[]) ~ "" if var('fivetran_integrity_sla_metric_parity_exclusion_tickets',[]) }}
diff --git a/models/agent_work_time/int_zendesk__ticket_work_time_business.sql b/models/agent_work_time/int_zendesk__ticket_work_time_business.sql
@@ -63,7 +63,7 @@ with ticket_historical_status as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_solved_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -99,14 +99,14 @@ with ticket_historical_status as (
       schedule.end_time_utc as schedule_end_time,
       least(ticket_week_end_time, schedule.end_time_utc) - greatest(weekly_periods.ticket_week_start_time, schedule.start_time_utc) as scheduled_minutes
     from weekly_periods
-    join schedule on 
-      ticket_week_start_time <= schedule.end_time_utc 
+    join schedule
+      on ticket_week_start_time <= schedule.end_time_utc 
       and ticket_week_end_time >= schedule.start_time_utc
       and weekly_periods.schedule_id = schedule.schedule_id
       -- this chooses the Daylight Savings Time or Standard Time version of the schedule
       -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 ), business_minutes as (
 

diff --git a/models/reply_times/int_zendesk__ticket_first_reply_time_business.sql b/models/reply_times/int_zendesk__ticket_first_reply_time_business.sql
@@ -57,7 +57,7 @@ with ticket_reply_times as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_first_reply as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -102,6 +102,6 @@ with ticket_reply_times as (
 )
 
   select ticket_id,
-         sum(scheduled_minutes) as first_reply_time_business_minutes
+        sum(scheduled_minutes) as first_reply_time_business_minutes
   from intercepted_periods
   group by 1
diff --git a/models/resolution_times/int_zendesk__ticket_first_resolution_time_business.sql b/models/resolution_times/int_zendesk__ticket_first_resolution_time_business.sql
@@ -46,7 +46,7 @@ with ticket_resolution_times_calendar as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_first_resolution_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -72,22 +72,24 @@ with ticket_resolution_times_calendar as (
 
 ), intercepted_periods as (
 
-  select ticket_id,
-         week_number,
-         weekly_periods.schedule_id,
-         ticket_week_start_time,
-         ticket_week_end_time,
-         schedule.start_time_utc as schedule_start_time,
-         schedule.end_time_utc as schedule_end_time,
-         least(ticket_week_end_time, schedule.end_time_utc) - greatest(ticket_week_start_time, schedule.start_time_utc) as scheduled_minutes
+  select 
+    ticket_id,
+    week_number,
+    weekly_periods.schedule_id,
+    ticket_week_start_time,
+    ticket_week_end_time,
+    schedule.start_time_utc as schedule_start_time,
+    schedule.end_time_utc as schedule_end_time,
+    least(ticket_week_end_time, schedule.end_time_utc) - greatest(ticket_week_start_time, schedule.start_time_utc) as scheduled_minutes
   from weekly_periods
-  join schedule on ticket_week_start_time <= schedule.end_time_utc 
+  join schedule
+    on ticket_week_start_time <= schedule.end_time_utc 
     and ticket_week_end_time >= schedule.start_time_utc
     and weekly_periods.schedule_id = schedule.schedule_id
     -- this chooses the Daylight Savings Time or Standard Time version of the schedule
     -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 )
 

diff --git a/models/resolution_times/int_zendesk__ticket_full_resolution_time_business.sql b/models/resolution_times/int_zendesk__ticket_full_resolution_time_business.sql
@@ -45,7 +45,7 @@ with ticket_resolution_times_calendar as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_resolution_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -85,8 +85,8 @@ with ticket_resolution_times_calendar as (
     and weekly_periods.schedule_id = schedule.schedule_id
     -- this chooses the Daylight Savings Time or Standard Time version of the schedule
     -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+    and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 )
 

diff --git a/models/sla_policy/agent_work_time/int_zendesk__agent_work_time_business_hours.sql b/models/sla_policy/agent_work_time/int_zendesk__agent_work_time_business_hours.sql
@@ -75,7 +75,7 @@ with agent_work_time_filtered_statuses as (
 
 ), weeks as (
 
-    {{ dbt_utils.generate_series(208) }}
+    {{ dbt_utils.generate_series(52) }}
 
 ), weeks_cross_ticket_full_solved_time as (
     -- because time is reported in minutes since the beginning of the week, we have to split up time spent on the ticket into calendar weeks
@@ -120,17 +120,21 @@ with agent_work_time_filtered_statuses as (
       weekly_period_agent_work_time.week_number,
       weekly_period_agent_work_time.ticket_week_start_time_minute,
       weekly_period_agent_work_time.ticket_week_end_time_minute,
-      schedule.start_time_utc as schedule_start_time,
+      coalesce(schedule.start_time_utc, 0) as schedule_start_time,
       schedule.end_time_utc as schedule_end_time,
-      least(ticket_week_end_time_minute, schedule.end_time_utc) - greatest(weekly_period_agent_work_time.ticket_week_start_time_minute, schedule.start_time_utc) as scheduled_minutes
+      coalesce(
+        least(ticket_week_end_time_minute, schedule.end_time_utc)
+        - greatest(weekly_period_agent_work_time.ticket_week_start_time_minute, schedule.start_time_utc),
+        0) as scheduled_minutes
     from weekly_period_agent_work_time
-    join schedule on ticket_week_start_time_minute <= schedule.end_time_utc 
+    left join schedule
+      on ticket_week_start_time_minute <= schedule.end_time_utc 
       and ticket_week_end_time_minute >= schedule.start_time_utc
       and weekly_period_agent_work_time.schedule_id = schedule.schedule_id
       -- this chooses the Daylight Savings Time or Standard Time version of the schedule
       -- We have everything calculated within a week, so take us to the appropriate week first by adding the week_number * minutes-in-a-week to the minute-mark where we start and stop counting for the week
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time_minute', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) > cast(schedule.valid_from as {{ dbt.type_timestamp() }})
-      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time_minute', from_date_or_timestamp='start_week_date') }} as {{ dbt.type_timestamp() }}) < cast(schedule.valid_until as {{ dbt.type_timestamp() }})
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_end_time_minute', from_date_or_timestamp='start_week_date') }} as date) > cast(schedule.valid_from as date)
+      and cast( {{ dbt.dateadd(datepart='minute', interval='week_number * (7*24*60) + ticket_week_start_time_minute', from_date_or_timestamp='start_week_date') }} as date) < cast(schedule.valid_until as date)
 
 ), intercepted_periods_with_running_total as (
 
@@ -152,7 +156,7 @@ with agent_work_time_filtered_statuses as (
     lag(target - running_total_scheduled_minutes) over
           (partition by ticket_id, sla_applied_at order by valid_starting_at, week_number, schedule_end_time) as lag_check,
     case when (target - running_total_scheduled_minutes) = 0 then true
-       when (target - running_total_scheduled_minutes) < 0 
+      when (target - running_total_scheduled_minutes) < 0 
         and 
           (lag(target - running_total_scheduled_minutes) over
           (partition by ticket_id, sla_applied_at order by valid_starting_at, week_number, schedule_end_time) > 0