-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmaster_script.sh
executable file
·164 lines (139 loc) · 5.58 KB
/
master_script.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#/!bin/bash
int_handler() {
echo "Interrupted."
kill $PPID
exit 1
}
trap 'int_handler' INT
if [ "$#" -ne 4 ]; then
echo "Expected 4 arguments, $# given."
echo "Usage: ./master_script.sh <data_dir> <scale> <partitions> <num_runs>"
exit
fi
DATA_DIR=$1
mkdir -p ${DATA_DIR}
SCALE=$2
PARTITION=$3
NUM_RUNS=$4
set -x
echo "[*] Pulling Docker images"
docker pull supawit2/deepola-data:sigmod2023
docker pull supawit2/deepola-polars:sigmod2023
docker pull supawit2/deepola-wanderjoin:sigmod2023
docker pull supawit2/deepola-wake:sigmod2023
docker pull supawit2/deepola-viz:sigmod2023
docker image tag supawit2/deepola-data:sigmod2023 deepola-data:sigmod2023
docker image tag supawit2/deepola-polars:sigmod2023 deepola-polars:sigmod2023
docker image tag supawit2/deepola-wanderjoin:sigmod2023 deepola-wanderjoin:sigmod2023
docker image tag supawit2/deepola-wake:sigmod2023 deepola-wake:sigmod2023
docker image tag supawit2/deepola-viz:sigmod2023 deepola-viz:sigmod2023
echo "[*] Generating Dataset"
# Generate TPC-H Dataset
docker run --rm \
-v ${DATA_DIR}:/dataset/tpch:rw \
--name dataset deepola-data:sigmod2023 \
bash data-gen.sh ${SCALE} ${PARTITION} /dataset/tpch
# Convert to Parquet
docker run --rm \
-v ${DATA_DIR}:/dataset/tpch:rw \
--name dataset deepola-data:sigmod2023 \
python3 convert-to-parquet.py /dataset/tpch/scale=${SCALE}/partition=${PARTITION}/tbl
# Generate Cleaned Table Dataset
docker run --rm \
-v ${DATA_DIR}:/dataset/tpch:rw \
--name dataset deepola-data:sigmod2023 \
python3 clean-data.py /dataset/tpch ${SCALE} ${PARTITION}
# Convert to Parquet
docker run --rm \
-v ${DATA_DIR}:/dataset/tpch:rw \
--name dataset deepola-data:sigmod2023 \
python3 convert-to-parquet.py /dataset/tpch/scale=${SCALE}/partition=${PARTITION}/cleaned-tbl
echo "[*] Figure 7: Existing Exact Baselines"
# Running Postgres
export QUERY_DIR=./resources/tpc-h/queries
export POSTGRES_DIR=./tmp/postgres/scale=${SCALE}/partition=${PARTITION}
export OUTPUT_DIR=./results/postgres/scale=${SCALE}/
./baselines/postgres/experiment-setup.sh ${DATA_DIR} ${QUERY_DIR} ${POSTGRES_DIR} ${SCALE} ${PARTITION}
./baselines/postgres/experiment-time.sh $QUERY_DIR $OUTPUT_DIR $POSTGRES_DIR ${SCALE} ${PARTITION} ${NUM_RUNS} 1 1 22
python3 baselines/postgres/extract-time.py $OUTPUT_DIR ${SCALE} ${PARTITION} ${NUM_RUNS} 1 1 22 > $OUTPUT_DIR/timings.csv
# Running Polars
docker run --rm \
-v ${DATA_DIR}:/dataset/tpch:rw \
-v `pwd`/results/polars:/results/polars \
--name polars deepola-polars:sigmod2023 \
bash experiment.sh /dataset/tpch /results/polars ${SCALE} ${PARTITION} ${NUM_RUNS} 1 1 22
# Running WAKE
docker run --rm \
-v ${DATA_DIR}:/dataset:rw \
-v `pwd`/results/wake:/saved-outputs:rw \
--name wake deepola-wake:sigmod2023 \
bash scripts/experiment_wake_tpch.sh /dataset ${SCALE} ${PARTITION} ${NUM_RUNS} 0 1 22
# Visualizing the results to obtain Figure 7.
docker run --rm \
-v `pwd`/results/wake:/results/wake:rw \
-v `pwd`/results/polars:/results/polars:rw \
-v `pwd`/results/postgres:/results/postgres:rw \
-v `pwd`/results/viz:/results/viz:rw \
--name viz deepola-viz:sigmod2023 \
python3 scripts/plot_tpch.py ${SCALE} ${PARTITION} ${NUM_RUNS}
# Visualizing the results to obtain Figure 8.
docker run --rm \
-v `pwd`/results/wake:/results/wake:rw \
-v `pwd`/results/polars:/results/polars:rw \
-v `pwd`/results/viz:/results/viz:rw \
--name viz deepola-viz:sigmod2023 \
python3 scripts/plot_tpch_error.py ${SCALE} ${PARTITION} ${NUM_RUNS}
echo "[*] Figure 9: Existing OLA Baselines"
# Running Wake
docker run --rm \
-v ${DATA_DIR}:/dataset:rw \
-v `pwd`/results/wake:/saved-outputs:rw \
--name wake deepola-wake:sigmod2023 \
bash scripts/experiment_wake_tpch.sh /dataset ${SCALE} ${PARTITION} ${NUM_RUNS} 0 23 27
# Running Wanderjoin
docker run --rm \
-v ${DATA_DIR}:/wanderjoin/tpch:rw \
-v `pwd`/results/wanderjoin:/wanderjoin/outputs:rw \
--name wanderjoin deepola-wanderjoin:sigmod2023 \
bash experiment.sh tpch queries outputs ${SCALE} ${PARTITION} ${NUM_RUNS} 1 23 25
# Visualizing results
docker run --rm \
-v `pwd`/results/wake:/results/wake:rw \
-v `pwd`/results/wanderjoin:/results/wanderjoin:rw \
-v `pwd`/results/viz:/results/viz:rw \
-v `pwd`/results/polars:/results/polars:rw \
--name viz deepola-viz:sigmod2023 \
python3 scripts/plot_tpch_ola.py ${SCALE} ${PARTITION} ${NUM_RUNS}
echo "[*] Figure 10: Confidence Interval"
# Running Wake
docker run --rm \
-v ${DATA_DIR}:/dataset:rw \
-v `pwd`/results/wake:/saved-outputs:rw \
--name wake deepola-wake:sigmod2023 \
bash scripts/experiment_wake_ci.sh /dataset ${SCALE} ${PARTITION} 100 0
# Visualizing Results
docker run --rm \
-v `pwd`/results/wake:/results/wake:rw \
-v `pwd`/results/viz:/results/viz:rw \
--name viz deepola-viz:sigmod2023 \
python3 scripts/plot_ci.py ${SCALE} ${PARTITION} 100
echo "[*] Figure 11: Query Depth Experiment"
# Generate Dataset
docker run --rm \
-v ${DATA_DIR}:/dataset:rw \
-v `pwd`/results/wake:/saved-outputs:rw \
--name wake deepola-wake:sigmod2023 \
python scripts/deep_data_gen.py 10 1000000 100 4 /dataset/g10_p1m_n100_c4
# Running Wake
docker run --rm \
-v ${DATA_DIR}:/dataset:rw \
-v `pwd`/results/wake:/saved-outputs:rw \
--name wake deepola-wake:sigmod2023 \
bash scripts/experiment_wake_depth.sh /dataset 10 0
# Visualizing Results
docker run --rm \
-v `pwd`/results/wake:/results/wake:rw \
-v `pwd`/results/viz:/results/viz:rw \
--name viz deepola-viz:sigmod2023 \
python3 scripts/plot_depth.py 10
set +x