Skip to content

Commit

Permalink
Create web100 passthrough view for extended web100 views (#138)
Browse files Browse the repository at this point in the history
* Create web100 static passthrough view for extended web100 views
* Rename web100_static to web100 for consistency
  • Loading branch information
stephen-soltesz authored Apr 28, 2022
1 parent 1d19ad9 commit a871c45
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 20 deletions.
4 changes: 2 additions & 2 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ steps:
# Use cbif condition: only run these steps in one of these projects.
- PROJECT_IN=mlab-sandbox,mlab-staging
args:
- /workspace/views/create_dataset_views.sh self $PROJECT_ID $PROJECT_ID
- /workspace/transform/create_static_tables.sh $PROJECT_ID
- /workspace/views/create_dataset_views.sh self $PROJECT_ID $PROJECT_ID

# Deployments to oti and measurement-lab.
- name: gcr.io/$PROJECT_ID/gcloud-jsonnet-cbif
env:
# Use cbif condition: only run these steps in one of these projects.
- PROJECT_IN=mlab-oti
args:
- /workspace/views/create_dataset_views.sh self $PROJECT_ID $PROJECT_ID
- /workspace/transform/create_static_tables.sh $PROJECT_ID
- /workspace/views/create_dataset_views.sh self $PROJECT_ID $PROJECT_ID
- /workspace/views/create_dataset_views.sh self $PROJECT_ID measurement-lab
5 changes: 1 addition & 4 deletions transform/create_static_tables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ cd ${BASEDIR}

function create_table() {
local query_file=${1:?Please provide query file}
local table=$( grep 'CREATE TABLE' $query_file | awk '{print $3}' )

bq query --project_id=$PROJECT --nouse_legacy_sql "$( cat $query_file )"
echo "Created table $PROJECT.$table successfully"
}

create_table ./web100_static.sql
create_table ./web100.sql
2 changes: 1 addition & 1 deletion transform/web100_static.sql → transform/web100.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
-- for queries.
--
-- Always create within local project.
CREATE TABLE IF NOT EXISTS ndt.web100_static
CREATE TABLE IF NOT EXISTS ndt.web100
PARTITION BY date
OPTIONS (
require_partition_filter=true
Expand Down
23 changes: 12 additions & 11 deletions views/create_dataset_views.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ create_view ${SRC_PROJECT} ${DST_PROJECT} ndt_raw ./ndt_raw/hopannotation1.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt_raw ./ndt_raw/scamper1.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt_raw ./ndt_raw/tcpinfo.sql

# Public pass-through views for joined tables.
if [[ ${DST_PROJECT} = "measurement-lab" ]] ; then
# NOTE: these steps can only be applied in the public measurement-lab
# project because in other M-Lab projects, these targets are actual
# tables. Only in measurement-lab can we create these views.
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/ndt5.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/ndt7.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/tcpinfo.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/scamper1.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/web100.sql
fi

# NDT extended (mixed parsers)
create_view ${DST_PROJECT} ${DST_PROJECT} ndt_intermediate ./ndt_intermediate/extended_ndt5_downloads.sql
create_view ${DST_PROJECT} ${DST_PROJECT} ndt_intermediate ./ndt_intermediate/extended_ndt5_uploads.sql
Expand All @@ -103,17 +115,6 @@ create_view ${DST_PROJECT} ${DST_PROJECT} ndt ./ndt/unified_uploads_20201026x.sq
create_view ${DST_PROJECT} ${DST_PROJECT} ndt ./ndt/unified_uploads.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/scamper1_hopannotation1.sql

# Public pass-through views for joined tables.
if [[ ${DST_PROJECT} = "measurement-lab" ]] ; then
# NOTE: these steps can only be applied in the public measurement-lab
# project because in other M-Lab projects, these targets are actual
# tables. Only in measurement-lab can we create these views.
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/ndt5.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/ndt7.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/tcpinfo.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} ndt ./ndt/scamper1.sql
fi

# traceroute.
create_view ${SRC_PROJECT} ${DST_PROJECT} traceroute ./traceroute/scamper1.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} traceroute ./traceroute/paris1_legacy.sql
Expand Down
9 changes: 9 additions & 0 deletions views/ndt/web100.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
--
-- This view is a pass-through for date partitioned ndt web100 data. The data
-- in this table is a static transformation of data from the v1 data pipeline
-- for the ndt web100 dataset. It is "static" because it is not actively
-- reprocessed. While it uses standard column conventions, the schema is not
-- guaranteed to be backward compatible b/c there is currently no parser that
-- supports reprocessing this format.
--
SELECT * FROM `{{.ProjectID}}.ndt.web100`
2 changes: 1 addition & 1 deletion views/ndt_intermediate/extended_web100_downloads.sql
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ WITH PreCleanWeb100 AS (
parser.ArchiveURL,
parser.Filename
) AS Web100parser,
FROM `{{.ProjectID}}.ndt.web100_static`
FROM `{{.ProjectID}}.ndt.web100`
WHERE
raw.web100.snap.Duration IS NOT NULL
AND raw.web100.snap.State IS NOT NULL
Expand Down
2 changes: 1 addition & 1 deletion views/ndt_intermediate/extended_web100_uploads.sql
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ WITH PreCleanWeb100 AS (
parser.ArchiveURL,
parser.Filename
) AS Web100parser,
FROM `{{.ProjectID}}.ndt.web100_static`
FROM `{{.ProjectID}}.ndt.web100`
WHERE
raw.web100.snap.Duration IS NOT NULL
AND raw.web100.snap.State IS NOT NULL
Expand Down

0 comments on commit a871c45

Please sign in to comment.