From b3e0f8e34181b9b716c8472b49696edeafed2e6d Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Wed, 7 Dec 2022 16:52:22 +0900 Subject: [PATCH 1/7] Rename yml/md files and add docs --- jaffle_shop/macros/sensitive/{macros.md => _macros.md} | 0 jaffle_shop/macros/sensitive/{macros.yml => _macros.yml} | 0 jaffle_shop/models/_models.md | 3 +++ jaffle_shop/models/{schema.yml => _models.yml} | 0 jaffle_shop/models/staging/{schema.yml => _staging.yml} | 0 5 files changed, 3 insertions(+) rename jaffle_shop/macros/sensitive/{macros.md => _macros.md} (100%) rename jaffle_shop/macros/sensitive/{macros.yml => _macros.yml} (100%) create mode 100644 jaffle_shop/models/_models.md rename jaffle_shop/models/{schema.yml => _models.yml} (100%) rename jaffle_shop/models/staging/{schema.yml => _staging.yml} (100%) diff --git a/jaffle_shop/macros/sensitive/macros.md b/jaffle_shop/macros/sensitive/_macros.md similarity index 100% rename from jaffle_shop/macros/sensitive/macros.md rename to jaffle_shop/macros/sensitive/_macros.md diff --git a/jaffle_shop/macros/sensitive/macros.yml b/jaffle_shop/macros/sensitive/_macros.yml similarity index 100% rename from jaffle_shop/macros/sensitive/macros.yml rename to jaffle_shop/macros/sensitive/_macros.yml diff --git a/jaffle_shop/models/_models.md b/jaffle_shop/models/_models.md new file mode 100644 index 000000000..93296864a --- /dev/null +++ b/jaffle_shop/models/_models.md @@ -0,0 +1,3 @@ +{% docs order_status %} +Current status of the order. Categorical with options 'placed', 'shipped', 'completed', 'return_pending', or 'returned'. +{% enddocs %} diff --git a/jaffle_shop/models/schema.yml b/jaffle_shop/models/_models.yml similarity index 100% rename from jaffle_shop/models/schema.yml rename to jaffle_shop/models/_models.yml diff --git a/jaffle_shop/models/staging/schema.yml b/jaffle_shop/models/staging/_staging.yml similarity index 100% rename from jaffle_shop/models/staging/schema.yml rename to jaffle_shop/models/staging/_staging.yml From fbd8c622219c99075f5e22953e9a7d64f929fe8a Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Wed, 7 Dec 2022 17:00:15 +0900 Subject: [PATCH 2/7] Add src_seed dir and _sources.yml fix --- .../{_staging.yml => src_seed/_models.yml} | 16 ++++++++++++++++ jaffle_shop/models/staging/src_seed/_sources.yml | 9 +++++++++ .../staging/{ => src_seed}/stg_customers.sql | 0 .../models/staging/{ => src_seed}/stg_orders.sql | 0 .../staging/{ => src_seed}/stg_payments.sql | 0 5 files changed, 25 insertions(+) rename jaffle_shop/models/staging/{_staging.yml => src_seed/_models.yml} (52%) create mode 100644 jaffle_shop/models/staging/src_seed/_sources.yml rename jaffle_shop/models/staging/{ => src_seed}/stg_customers.sql (100%) rename jaffle_shop/models/staging/{ => src_seed}/stg_orders.sql (100%) rename jaffle_shop/models/staging/{ => src_seed}/stg_payments.sql (100%) diff --git a/jaffle_shop/models/staging/_staging.yml b/jaffle_shop/models/staging/src_seed/_models.yml similarity index 52% rename from jaffle_shop/models/staging/_staging.yml rename to jaffle_shop/models/staging/src_seed/_models.yml index c207e4cf5..199e82005 100644 --- a/jaffle_shop/models/staging/_staging.yml +++ b/jaffle_shop/models/staging/src_seed/_models.yml @@ -2,6 +2,12 @@ version: 2 models: - name: stg_customers + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers + description: | + A version of `stg_customers_pii` with sensitive details hashed. For more details, see the PII + table. columns: - name: customer_id tests: @@ -9,6 +15,11 @@ models: - not_null - name: stg_orders + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers + description: | + Table of all orders made at our Jaffle Shop! columns: - name: order_id tests: @@ -20,6 +31,11 @@ models: values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] - name: stg_payments + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers + description: | + Table of all payments made to our Jaffle Shop! columns: - name: payment_id tests: diff --git a/jaffle_shop/models/staging/src_seed/_sources.yml b/jaffle_shop/models/staging/src_seed/_sources.yml new file mode 100644 index 000000000..dfbfb665b --- /dev/null +++ b/jaffle_shop/models/staging/src_seed/_sources.yml @@ -0,0 +1,9 @@ +version: 2 + +sources: + - name: src_seed + schema: jaffle_shop + tables: + - name: raw_customers + - name: raw_orders + - name: raw_payments diff --git a/jaffle_shop/models/staging/stg_customers.sql b/jaffle_shop/models/staging/src_seed/stg_customers.sql similarity index 100% rename from jaffle_shop/models/staging/stg_customers.sql rename to jaffle_shop/models/staging/src_seed/stg_customers.sql diff --git a/jaffle_shop/models/staging/stg_orders.sql b/jaffle_shop/models/staging/src_seed/stg_orders.sql similarity index 100% rename from jaffle_shop/models/staging/stg_orders.sql rename to jaffle_shop/models/staging/src_seed/stg_orders.sql diff --git a/jaffle_shop/models/staging/stg_payments.sql b/jaffle_shop/models/staging/src_seed/stg_payments.sql similarity index 100% rename from jaffle_shop/models/staging/stg_payments.sql rename to jaffle_shop/models/staging/src_seed/stg_payments.sql From 9978716480a5b2ed4816ed739e28027dc5ea18ea Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Wed, 7 Dec 2022 17:05:52 +0900 Subject: [PATCH 3/7] Manage PII for stg_customers --- .../models/staging/src_seed/_models.yml | 3 --- .../staging/src_seed/sensitive/_models.yml | 23 +++++++++++++++++ .../src_seed/sensitive/stg_customers_pii.sql | 22 ++++++++++++++++ .../models/staging/src_seed/stg_customers.sql | 25 +++---------------- 4 files changed, 48 insertions(+), 25 deletions(-) create mode 100644 jaffle_shop/models/staging/src_seed/sensitive/_models.yml create mode 100644 jaffle_shop/models/staging/src_seed/sensitive/stg_customers_pii.sql diff --git a/jaffle_shop/models/staging/src_seed/_models.yml b/jaffle_shop/models/staging/src_seed/_models.yml index 199e82005..eb2ec54a9 100644 --- a/jaffle_shop/models/staging/src_seed/_models.yml +++ b/jaffle_shop/models/staging/src_seed/_models.yml @@ -2,9 +2,6 @@ version: 2 models: - name: stg_customers - meta: - owner: "alex.malins@octoenergy.com" - team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers description: | A version of `stg_customers_pii` with sensitive details hashed. For more details, see the PII table. diff --git a/jaffle_shop/models/staging/src_seed/sensitive/_models.yml b/jaffle_shop/models/staging/src_seed/sensitive/_models.yml new file mode 100644 index 000000000..df34d9cac --- /dev/null +++ b/jaffle_shop/models/staging/src_seed/sensitive/_models.yml @@ -0,0 +1,23 @@ +version: 2 + +models: + - name: stg_customers_pii + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers + description: | + Table of customers of our Jaffle Shop! + + Contains PII. + columns: + - name: customer_id + description: Unique customer identifier. + tests: + - unique + - not_null + - name: first_name + meta: + sensitive: true + - name: last_name + meta: + sensitive: true diff --git a/jaffle_shop/models/staging/src_seed/sensitive/stg_customers_pii.sql b/jaffle_shop/models/staging/src_seed/sensitive/stg_customers_pii.sql new file mode 100644 index 000000000..cad047269 --- /dev/null +++ b/jaffle_shop/models/staging/src_seed/sensitive/stg_customers_pii.sql @@ -0,0 +1,22 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_customers') }} + +), + +renamed as ( + + select + id as customer_id, + first_name, + last_name + + from source + +) + +select * from renamed diff --git a/jaffle_shop/models/staging/src_seed/stg_customers.sql b/jaffle_shop/models/staging/src_seed/stg_customers.sql index cad047269..7e23e42c0 100644 --- a/jaffle_shop/models/staging/src_seed/stg_customers.sql +++ b/jaffle_shop/models/staging/src_seed/stg_customers.sql @@ -1,22 +1,3 @@ -with source as ( - - {#- - Normally we would select from the table here, but we are using seeds to load - our data in this project - #} - select * from {{ ref('raw_customers') }} - -), - -renamed as ( - - select - id as customer_id, - first_name, - last_name - - from source - -) - -select * from renamed +SELECT + {{ hash_sensitive_columns("stg_customers_pii") }} +FROM {{ ref("stg_customers_pii") }} From 92c98a9079e426f27bddce1d6a64c4f616b5932a Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Thu, 8 Dec 2022 11:11:54 +0900 Subject: [PATCH 4/7] Add final tables for two dashboards --- jaffle_shop/models/final/finance/_models.yml | 28 +++++++++++++++++++ .../fnl_finance_customerlifetimereturns.sql | 6 ++++ jaffle_shop/models/final/sales/_models.yml | 28 +++++++++++++++++++ .../final/sales/fnl_sales_newcustomers.sql | 12 ++++++++ 4 files changed, 74 insertions(+) create mode 100644 jaffle_shop/models/final/finance/_models.yml create mode 100644 jaffle_shop/models/final/finance/fnl_finance_customerlifetimereturns.sql create mode 100644 jaffle_shop/models/final/sales/_models.yml create mode 100644 jaffle_shop/models/final/sales/fnl_sales_newcustomers.sql diff --git a/jaffle_shop/models/final/finance/_models.yml b/jaffle_shop/models/final/finance/_models.yml new file mode 100644 index 000000000..3a563311b --- /dev/null +++ b/jaffle_shop/models/final/finance/_models.yml @@ -0,0 +1,28 @@ +version: 2 + +exposures: + - name: customer_lifetime_returns_value_dashboard + label: Customer lifetime returns value dashboard + description: A dashboard for the lifetime value (total amount) of returns from each customer. + type: dashboard + url: https://prod-apnortheast-a.online.tableau.com/#/site/octopusenergyjapan/home/customerlifetimereturns + owner: + email: "alex.malins@octoenergy.com" + depends_on: + - ref('fnl_finance_customerlifetimereturns') + +models: + - name: fnl_finance_customerlifetimereturns + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers + description: | + Table with the total sales for each customer. + columns: + - name: customer_id + description: Unique customer id. + tests: + - unique + - not_null + - name: customer_lifetime_returns + description: Total value of all orders returned by customer. diff --git a/jaffle_shop/models/final/finance/fnl_finance_customerlifetimereturns.sql b/jaffle_shop/models/final/finance/fnl_finance_customerlifetimereturns.sql new file mode 100644 index 000000000..1f424b6e9 --- /dev/null +++ b/jaffle_shop/models/final/finance/fnl_finance_customerlifetimereturns.sql @@ -0,0 +1,6 @@ +SELECT + customer_id + , SUM(amount) as customer_lifetime_returns +FROM {{ ref('wh_orders') }} +WHERE status = 'returned' +GROUP BY customer_id diff --git a/jaffle_shop/models/final/sales/_models.yml b/jaffle_shop/models/final/sales/_models.yml new file mode 100644 index 000000000..0f6131d21 --- /dev/null +++ b/jaffle_shop/models/final/sales/_models.yml @@ -0,0 +1,28 @@ +version: 2 + +exposures: + - name: new_customers_dashboard + label: Montlhly new customers dashboard + description: A dashboard for the number of new customers making first orders each month. + type: dashboard + url: https://prod-apnortheast-a.online.tableau.com/#/site/octopusenergyjapan/home/monthlynewcustomers + owner: + email: "alex.malins@octoenergy.com" + depends_on: + - ref('fnl_sales_newcustomers') + +models: + - name: fnl_sales_newcustomers + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers + description: | + Count of new customers (i.e. ones making their first order) each month. + columns: + - name: year_month + description: Year and month. + tests: + - unique + - not_null + - name: new_customers + description: Number of new customers making first orders that month. \ No newline at end of file diff --git a/jaffle_shop/models/final/sales/fnl_sales_newcustomers.sql b/jaffle_shop/models/final/sales/fnl_sales_newcustomers.sql new file mode 100644 index 000000000..0f24f6bb5 --- /dev/null +++ b/jaffle_shop/models/final/sales/fnl_sales_newcustomers.sql @@ -0,0 +1,12 @@ +WITH customer_first_orders AS ( + SELECT + DATE_TRUNC('MONTH', first_order) AS year_month + FROM {{ ref('wh_customers') }} + WHERE first_order IS NOT NULL +) + +SELECT + year_month + , COUNT(1) AS new_customers +FROM customer_first_orders +GROUP BY year_month From ccff4146548ec9e9429546ee7efe033fa5659659 Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Thu, 8 Dec 2022 11:20:53 +0900 Subject: [PATCH 5/7] Remove duplicate docs file --- jaffle_shop/models/_models.md | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 jaffle_shop/models/_models.md diff --git a/jaffle_shop/models/_models.md b/jaffle_shop/models/_models.md deleted file mode 100644 index 93296864a..000000000 --- a/jaffle_shop/models/_models.md +++ /dev/null @@ -1,3 +0,0 @@ -{% docs order_status %} -Current status of the order. Categorical with options 'placed', 'shipped', 'completed', 'return_pending', or 'returned'. -{% enddocs %} From 7a039e553a6a472952eedd93e6ff41dfed016c15 Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Wed, 7 Dec 2022 17:07:26 +0900 Subject: [PATCH 6/7] Put customers and orders tables into warehouse --- .../models/{docs.md => warehouse/_docs.md} | 0 .../models/{ => warehouse}/_models.yml | 28 ++++++------------- .../wh_customers.sql} | 4 +-- .../{orders.sql => warehouse/wh_orders.sql} | 0 .../dbt_project_evaluator_exceptions.csv | 2 ++ 5 files changed, 13 insertions(+), 21 deletions(-) rename jaffle_shop/models/{docs.md => warehouse/_docs.md} (100%) rename jaffle_shop/models/{ => warehouse}/_models.yml (88%) rename jaffle_shop/models/{customers.sql => warehouse/wh_customers.sql} (94%) rename jaffle_shop/models/{orders.sql => warehouse/wh_orders.sql} (100%) create mode 100644 jaffle_shop/seeds/dbt_project_evaluator_exceptions.csv diff --git a/jaffle_shop/models/docs.md b/jaffle_shop/models/warehouse/_docs.md similarity index 100% rename from jaffle_shop/models/docs.md rename to jaffle_shop/models/warehouse/_docs.md diff --git a/jaffle_shop/models/_models.yml b/jaffle_shop/models/warehouse/_models.yml similarity index 88% rename from jaffle_shop/models/_models.yml rename to jaffle_shop/models/warehouse/_models.yml index 381349cfd..a7f41e0a8 100644 --- a/jaffle_shop/models/_models.yml +++ b/jaffle_shop/models/warehouse/_models.yml @@ -1,81 +1,71 @@ version: 2 models: - - name: customers + - name: wh_customers + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers description: This table has basic information about a customer, as well as some derived facts based on a customer's orders - columns: - name: customer_id description: This is a unique identifier for a customer tests: - unique - not_null - - name: first_name description: Customer's first name. PII. - - name: last_name description: Customer's last name. PII. - - name: first_order description: Date (UTC) of a customer's first order - - name: most_recent_order description: Date (UTC) of a customer's most recent order - - name: number_of_orders description: Count of the number of orders a customer has placed - - name: total_order_amount description: Total value (AUD) of a customer's orders - - name: orders + - name: wh_orders + meta: + owner: "alex.malins@octoenergy.com" + team_owner: '!subteam^S02GPV1135F' #@dbt_gatekeepers description: This table has basic information about orders, as well as some derived facts based on payments - columns: - name: order_id tests: - unique - not_null description: This is a unique identifier for an order - - name: customer_id description: Foreign key to the customers table tests: - not_null - relationships: - to: ref('customers') + to: ref('wh_customers') field: customer_id - - name: order_date description: Date (UTC) that the order was placed - - name: status description: '{{ doc("orders_status") }}' tests: - accepted_values: values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] - - name: amount description: Total amount (AUD) of the order tests: - not_null - - name: credit_card_amount description: Amount of the order (AUD) paid for by credit card tests: - not_null - - name: coupon_amount description: Amount of the order (AUD) paid for by coupon tests: - not_null - - name: bank_transfer_amount description: Amount of the order (AUD) paid for by bank transfer tests: - not_null - - name: gift_card_amount description: Amount of the order (AUD) paid for by gift card tests: diff --git a/jaffle_shop/models/customers.sql b/jaffle_shop/models/warehouse/wh_customers.sql similarity index 94% rename from jaffle_shop/models/customers.sql rename to jaffle_shop/models/warehouse/wh_customers.sql index 016a004fe..dd7c60ef4 100644 --- a/jaffle_shop/models/customers.sql +++ b/jaffle_shop/models/warehouse/wh_customers.sql @@ -49,8 +49,8 @@ final as ( select customers.customer_id, - customers.first_name, - customers.last_name, + customers.first_name_hash, + customers.last_name_hash, customer_orders.first_order, customer_orders.most_recent_order, customer_orders.number_of_orders, diff --git a/jaffle_shop/models/orders.sql b/jaffle_shop/models/warehouse/wh_orders.sql similarity index 100% rename from jaffle_shop/models/orders.sql rename to jaffle_shop/models/warehouse/wh_orders.sql diff --git a/jaffle_shop/seeds/dbt_project_evaluator_exceptions.csv b/jaffle_shop/seeds/dbt_project_evaluator_exceptions.csv new file mode 100644 index 000000000..7c69f74e4 --- /dev/null +++ b/jaffle_shop/seeds/dbt_project_evaluator_exceptions.csv @@ -0,0 +1,2 @@ +fct_name,column_name,id_to_exclude,comment + fct_staging_dependent_on_staging,parent,stg_customers_pii,Scrubbing pii permitted in staging layer. \ No newline at end of file From 972e4a3f39b668eaa28fed4d30c96a9374cf34ee Mon Sep 17 00:00:00 2001 From: Alex Malins <22991362+alexmalins@users.noreply.github.com> Date: Thu, 8 Dec 2022 11:40:11 +0900 Subject: [PATCH 7/7] Move sources to pacify pytest --- jaffle_shop/{models/staging/src_seed => }/_sources.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename jaffle_shop/{models/staging/src_seed => }/_sources.yml (100%) diff --git a/jaffle_shop/models/staging/src_seed/_sources.yml b/jaffle_shop/_sources.yml similarity index 100% rename from jaffle_shop/models/staging/src_seed/_sources.yml rename to jaffle_shop/_sources.yml