Skip to content

Commit

Permalink
run benchmarks nightly (#224)
Browse files Browse the repository at this point in the history
  • Loading branch information
ruslandoga authored Dec 20, 2024
1 parent e8a4431 commit ba2b1d8
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 173 deletions.
17 changes: 9 additions & 8 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: bench

on: workflow_dispatch
on:
workflow_dispatch:
schedule:
- cron: "42 9 * * *"

jobs:
benchee:
Expand Down Expand Up @@ -41,10 +44,8 @@ jobs:
- run: mix deps.get --only $MIX_ENV
- run: mix compile --warnings-as-errors
- run: mkdir results
- run: mix run bench/insert.exs | tee results/insert.txt
- run: mix run bench/stream.exs | tee results/stream.txt
- uses: actions/upload-artifact@v4
with:
name: results
path: results/*.txt

# - run: mix run bench/cast.exs
- run: mix run bench/encode.exs
- run: mix run bench/insert.exs
- run: mix run bench/stream.exs
121 changes: 3 additions & 118 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,121 +270,6 @@ taipei = DateTime.shift_zone!(utc, "Asia/Taipei")
Ch.query!(pid, "INSERT INTO ch_datetimes(datetime) FORMAT RowBinary", [[naive], [utc], [taipei]], types: ["DateTime"])
```

## Benchmarks

<details>
<summary><code>INSERT</code> 1 million rows <a href="https://github.com/ClickHouse/clickhouse-go#benchmark">(original)</a></summary>

<pre><code>
$ MIX_ENV=bench mix run bench/insert.exs

This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark

Operating System: macOS
CPU Information: Apple M1
Number of Available Cores: 8
Available memory: 8 GB
Elixir 1.14.4
Erlang 25.3

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 1_000_000 rows
Estimated total run time: 28 s

Benchmarking encode with input 1_000_000 rows ...
Benchmarking encode stream with input 1_000_000 rows ...
Benchmarking insert with input 1_000_000 rows ...
Benchmarking insert stream with input 1_000_000 rows ...

##### With input 1_000_000 rows #####
Name ips average deviation median 99th %
encode stream 1.63 612.96 ms ±11.30% 583.03 ms 773.01 ms
insert stream 1.22 819.82 ms ±9.41% 798.94 ms 973.45 ms
encode 1.09 915.75 ms ±44.13% 750.98 ms 1637.02 ms
insert 0.73 1373.84 ms ±31.01% 1331.86 ms 1915.76 ms

Comparison:
encode stream 1.63
insert stream 1.22 - 1.34x slower +206.87 ms
encode 1.09 - 1.49x slower +302.79 ms
insert 0.73 - 2.24x slower +760.88 ms</code>
</pre>

</details>

<details>
<summary><code>SELECT</code> 500, 500 thousand, and 500 million rows <a href="https://github.com/ClickHouse/ch-bench">(original)</a></summary>

<pre><code>
$ MIX_ENV=bench mix run bench/stream.exs

This benchmark is based on https://github.com/ClickHouse/ch-bench

Operating System: macOS
CPU Information: Apple M1
Number of Available Cores: 8
Available memory: 8 GB
Elixir 1.14.4
Erlang 25.3

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 500 rows, 500_000 rows, 500_000_000 rows
Estimated total run time: 1.05 min

Benchmarking stream with decode with input 500 rows ...
Benchmarking stream with decode with input 500_000 rows ...
Benchmarking stream with decode with input 500_000_000 rows ...
Benchmarking stream with manual decode with input 500 rows ...
Benchmarking stream with manual decode with input 500_000 rows ...
Benchmarking stream with manual decode with input 500_000_000 rows ...
Benchmarking stream without decode with input 500 rows ...
Benchmarking stream without decode with input 500_000 rows ...
Benchmarking stream without decode with input 500_000_000 rows ...

##### With input 500 rows #####
Name ips average deviation median 99th %
stream with decode 4.69 K 213.34 μs ±12.49% 211.38 μs 290.94 μs
stream with manual decode 4.69 K 213.43 μs ±17.40% 210.96 μs 298.75 μs
stream without decode 4.65 K 215.08 μs ±10.79% 213.79 μs 284.66 μs

Comparison:
stream with decode 4.69 K
stream with manual decode 4.69 K - 1.00x slower +0.0838 μs
stream without decode 4.65 K - 1.01x slower +1.74 μs

##### With input 500_000 rows #####
Name ips average deviation median 99th %
stream without decode 234.58 4.26 ms ±13.99% 4.04 ms 5.95 ms
stream with manual decode 64.26 15.56 ms ±8.36% 15.86 ms 17.97 ms
stream with decode 41.03 24.37 ms ±6.27% 24.39 ms 26.60 ms

Comparison:
stream without decode 234.58
stream with manual decode 64.26 - 3.65x slower +11.30 ms
stream with decode 41.03 - 5.72x slower +20.11 ms

##### With input 500_000_000 rows #####
Name ips average deviation median 99th %
stream without decode 0.32 3.17 s ±0.20% 3.17 s 3.17 s
stream with manual decode 0.0891 11.23 s ±0.00% 11.23 s 11.23 s
stream with decode 0.0462 21.66 s ±0.00% 21.66 s 21.66 s

Comparison:
stream without decode 0.32
stream with manual decode 0.0891 - 3.55x slower +8.06 s
stream with decode 0.0462 - 6.84x slower +18.50 s</code>
</pre>

</details>

[CI Results](https://github.com/plausible/ch/actions/workflows/bench.yml) (click the latest workflow run and scroll down to "Artifacts")
## [Benchmarks](./bench)

See nightly [CI runs](https://github.com/plausible/ch/actions/workflows/bench.yml) for latest results.
27 changes: 27 additions & 0 deletions bench/encode.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
IO.puts("""
This benchmark measures the performance of encoding rows in RowBinary format.
""")

alias Ch.RowBinary

types = ["UInt64", "String", "Array(UInt8)", "DateTime"]

rows = fn count ->
Enum.map(1..count, fn i ->
[i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()]
end)
end

Benchee.run(
%{
"RowBinary" => fn rows -> RowBinary.encode_rows(rows, types) end,
"RowBinary stream" => fn rows ->
Stream.chunk_every(rows, 60_000)
|> Stream.each(fn chunk -> RowBinary.encode_rows(chunk, types) end)
|> Stream.run()
end
},
inputs: %{
"1_000_000 (UInt64, String, Array(UInt8), DateTime) rows" => rows.(1_000_000)
}
)
69 changes: 37 additions & 32 deletions bench/insert.exs
Original file line number Diff line number Diff line change
@@ -1,54 +1,59 @@
IO.puts("This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark\n")
IO.puts("""
This benchmark is based on https://github.com/ClickHouse/clickhouse-go#benchmark
It tests how quickly a client can insert one million rows of the following schema:
- col1 UInt64
- col2 String
- col3 Array(UInt8)
- col4 DateTime
""")

port = String.to_integer(System.get_env("CH_PORT") || "8123")
hostname = System.get_env("CH_HOSTNAME") || "localhost"
scheme = System.get_env("CH_SCHEME") || "http"
database = System.get_env("CH_DATABASE") || "ch_bench"

{:ok, conn} = Ch.start_link(scheme: scheme, hostname: hostname, port: port)
Ch.query!(conn, "CREATE DATABASE IF NOT EXISTS {$0:Identifier}", [database])

Ch.query!(conn, """
CREATE TABLE IF NOT EXISTS #{database}.benchmark (
col1 UInt64,
col2 String,
col3 Array(UInt8),
col4 DateTime
) Engine Null
""")

types = [Ch.Types.u64(), Ch.Types.string(), Ch.Types.array(Ch.Types.u8()), Ch.Types.datetime()]
statement = "INSERT INTO #{database}.benchmark FORMAT RowBinary"
alias Ch.RowBinary

rows = fn count ->
Enum.map(1..count, fn i ->
[i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], NaiveDateTime.utc_now()]
[i, "Golang SQL database driver", [1, 2, 3, 4, 5, 6, 7, 8, 9], DateTime.utc_now()]
end)
end

alias Ch.RowBinary
statement = "INSERT INTO #{database}.benchmark FORMAT RowBinary"
types = ["UInt64", "String", "Array(UInt8)", "DateTime"]

Benchee.run(
%{
# "control" => fn rows -> Enum.each(rows, fn _row -> :ok end) end,
"encode" => fn rows -> RowBinary.encode_rows(rows, types) end,
"insert" => fn rows -> Ch.query!(conn, statement, rows, types: types) end,
# "control stream" => fn rows -> rows |> Stream.chunk_every(60_000) |> Stream.run() end,
"encode stream" => fn rows ->
rows
|> Stream.chunk_every(60_000)
|> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end)
|> Stream.run()
"Ch.query" => fn %{pool: pool, rows: rows} ->
Ch.query!(pool, statement, rows, types: types)
end,
"insert stream" => fn rows ->
stream =
rows
|> Stream.chunk_every(60_000)
"Ch.stream" => fn %{pool: pool, rows: rows} ->
DBConnection.run(pool, fn conn ->
Stream.chunk_every(rows, 100_000)
|> Stream.map(fn chunk -> RowBinary.encode_rows(chunk, types) end)

Ch.query!(conn, statement, stream, encode: false)
|> Stream.into(Ch.stream(conn, statement, [], encode: false))
|> Stream.run()
end)
end
},
before_scenario: fn rows ->
{:ok, pool} = Ch.start_link(scheme: scheme, hostname: hostname, port: port, pool_size: 1)

Ch.query!(pool, "CREATE DATABASE IF NOT EXISTS {$0:Identifier}", [database])

Ch.query!(pool, """
CREATE TABLE IF NOT EXISTS #{database}.benchmark (
col1 UInt64,
col2 String,
col3 Array(UInt8),
col4 DateTime
) Engine Null
""")

%{pool: pool, rows: rows}
end,
inputs: %{
"1_000_000 rows" => rows.(1_000_000)
}
Expand Down
46 changes: 31 additions & 15 deletions bench/stream.exs
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
IO.puts("This benchmark is based on https://github.com/ClickHouse/ch-bench\n")
IO.puts("""
This benchmark is based on https://github.com/ClickHouse/ch-bench
It tests how quickly a client can select N rows from the system.numbers_mt table:
SELECT number FROM system.numbers_mt LIMIT {limit:UInt64} FORMAT RowBinary
""")

port = String.to_integer(System.get_env("CH_PORT") || "8123")
hostname = System.get_env("CH_HOSTNAME") || "localhost"
scheme = System.get_env("CH_SCHEME") || "http"

{:ok, conn} = Ch.start_link(scheme: scheme, hostname: hostname, port: port)
limits = fn limits ->
Map.new(limits, fn limit ->
{"limit=#{limit}", limit}
end)
end

Benchee.run(
%{
"RowBinary stream without decode" => fn limit ->
# "Ch.query" => fn %{pool: pool, limit: limit} ->
# Ch.query!(
# pool,
# "SELECT number FROM system.numbers_mt LIMIT {limit:UInt64}",
# %{"limit" => limit},
# timeout: :infinity
# )
# end,
"Ch.stream w/o decoding (i.e. pass-through)" => fn %{pool: pool, limit: limit} ->
DBConnection.run(
conn,
pool,
fn conn ->
conn
|> Ch.stream(
Expand All @@ -22,29 +40,27 @@ Benchee.run(
timeout: :infinity
)
end,
"RowBinary stream with manual decode" => fn limit ->
"Ch.stream with manual RowBinary decoding" => fn %{pool: pool, limit: limit} ->
DBConnection.run(
conn,
pool,
fn conn ->
conn
|> Ch.stream(
"SELECT number FROM system.numbers_mt LIMIT {limit:UInt64} FORMAT RowBinary",
%{"limit" => limit}
)
|> Stream.map(fn %Ch.Result{data: data} ->
data
|> IO.iodata_to_binary()
|> Ch.RowBinary.decode_rows([:u64])
|> Stream.each(fn %Ch.Result{data: data} ->
data |> IO.iodata_to_binary() |> Ch.RowBinary.decode_rows([:u64])
end)
|> Stream.run()
end,
timeout: :infinity
)
end
},
inputs: %{
"500 rows" => 500,
"500_000 rows" => 500_000,
"500_000_000 rows" => 500_000_000
}
before_scenario: fn limit ->
{:ok, pool} = Ch.start_link(scheme: scheme, hostname: hostname, port: port, pool_size: 1)
%{pool: pool, limit: limit}
end,
inputs: limits.([500, 500_000, 500_000_000])
)

0 comments on commit ba2b1d8

Please sign in to comment.