From 4a8467090f90d2ccd2a3922fff773bacc1dd25a9 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 5 Jun 2024 11:30:45 -0400 Subject: [PATCH] fix benchmarks --- .github/workflows/test.yml | 76 +++++++++--------- scripts/gossip_test.sh | 6 +- src/accountsdb/db.zig | 157 ++++++++++++++++++++++--------------- src/cmd/cmd.zig | 4 +- src/gossip/service.zig | 16 ++-- src/net/socket_utils.zig | 25 +++--- 6 files changed, 160 insertions(+), 124 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 345c42bb8..b24e094c2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,7 +24,7 @@ jobs: zig fmt --check src/ zig fmt --check build.zig - build-unix: + test: strategy: matrix: os: [ubuntu-latest] @@ -42,41 +42,47 @@ jobs: with: version: 0.12.0 - - name: build - run: zig build - - name: test run: zig build test - - name: gossip - run: bash scripts/gossip_test.sh 120 # in seconds + benchmarks: + strategy: + matrix: + os: [ubuntu-latest] + + runs-on: ${{matrix.os}} + + steps: + - name: checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: setup-zig + uses: goto-bus-stop/setup-zig@v1 + with: + version: 0.12.0 + + - name: benchmarks + run: zig build benchmark - # build-windows: - # runs-on: windows-latest - # defaults: - # run: - # shell: bash - - # steps: - # - name: setup-git - # run: | - # git config --global core.autocrlf false - # git config --global core.eol lf - - # - name: checkout - # uses: actions/checkout@v2 - # with: - # submodules: recursive - - # - name: setup-zig - # uses: goto-bus-stop/setup-zig@v1 - # with: - # version: master - - # - name: build - # run: | - # zig build -Dtarget=x86_64-windows --prefix tmp - # zig build - - # - name: test - # run: zig build test + gossip: + strategy: + matrix: + os: [ubuntu-latest] + + runs-on: ${{matrix.os}} + + steps: + - name: checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: setup-zig + uses: goto-bus-stop/setup-zig@v1 + with: + version: 0.12.0 + + - name: gossip + run: bash scripts/gossip_test.sh 120 # in seconds diff --git a/scripts/gossip_test.sh b/scripts/gossip_test.sh index ab111d1e7..063606992 100644 --- a/scripts/gossip_test.sh +++ b/scripts/gossip_test.sh @@ -2,8 +2,10 @@ echo "Running gossip test for $1 seconds" # build and run gossip -zig build -./zig-out/bin/sig gossip -e entrypoint.testnet.solana.com:8001 -e entrypoint2.testnet.solana.com:8001 & +zig build -Doptimize=ReleaseSafe && \ +./zig-out/bin/sig gossip \ + -e entrypoint.testnet.solana.com:8001 \ + -e entrypoint2.testnet.solana.com:8001 & # Get the process ID of the last background command PID=$! diff --git a/src/accountsdb/db.zig b/src/accountsdb/db.zig index ce889dc54..9eea6ec3b 100644 --- a/src/accountsdb/db.zig +++ b/src/accountsdb/db.zig @@ -1249,7 +1249,6 @@ pub const BenchmarkAccountsDB = struct { }; pub const args = [_]BenchArgs{ - // test accounts in ram BenchArgs{ .n_accounts = 100_000, .slot_list_len = 1, @@ -1258,81 +1257,111 @@ pub const BenchmarkAccountsDB = struct { .name = "100k accounts (1_slot - ram index - ram accounts)", }, BenchArgs{ - .n_accounts = 10_000, - .slot_list_len = 10, - .accounts = .ram, - .index = .ram, - .name = "10k accounts (10_slots - ram index - ram accounts)", - }, - - // tests large number of accounts on disk - BenchArgs{ - .n_accounts = 10_000, - .slot_list_len = 10, - .accounts = .disk, - .index = .ram, - .name = "10k accounts (10_slots - ram index - disk accounts)", - }, - BenchArgs{ - .n_accounts = 500_000, - .slot_list_len = 1, - .accounts = .disk, - .index = .ram, - .name = "500k accounts (1_slot - ram index - disk accounts)", - }, - BenchArgs{ - .n_accounts = 500_000, - .slot_list_len = 3, - .accounts = .disk, - .index = .ram, - .name = "500k accounts (3_slot - ram index - disk accounts)", - }, - BenchArgs{ - .n_accounts = 3_000_000, - .slot_list_len = 1, - .accounts = .disk, - .index = .ram, - .name = "3M accounts (1_slot - ram index - disk accounts)", - }, - BenchArgs{ - .n_accounts = 3_000_000, - .slot_list_len = 3, - .accounts = .disk, - .index = .ram, - .name = "3M accounts (3_slot - ram index - disk accounts)", - }, - BenchArgs{ - .n_accounts = 500_000, - .slot_list_len = 1, - .accounts = .disk, - .n_accounts_multiple = 2, // 1 mill accounts init - .index = .ram, - .name = "3M accounts (3_slot - ram index - disk accounts)", - }, - - // testing disk indexes - BenchArgs{ - .n_accounts = 500_000, + .n_accounts = 100_000, .slot_list_len = 1, - .accounts = .disk, + .accounts = .ram, .index = .disk, - .name = "500k accounts (1_slot - disk index - disk accounts)", + .name = "100k accounts (1_slot - disk index - ram accounts)", }, BenchArgs{ - .n_accounts = 3_000_000, + .n_accounts = 100_000, .slot_list_len = 1, .accounts = .disk, - .index = .disk, - .name = "3m accounts (1_slot - disk index - disk accounts)", + .index = .ram, + .name = "100k accounts (1_slot - ram index - disk accounts)", }, BenchArgs{ - .n_accounts = 500_000, + .n_accounts = 100_000, .slot_list_len = 1, .accounts = .disk, .index = .disk, - .n_accounts_multiple = 2, - .name = "500k accounts (1_slot - disk index - disk accounts)", + .name = "100k accounts (1_slot - disk index - disk accounts)", }, + + // // test accounts in ram + // BenchArgs{ + // .n_accounts = 100_000, + // .slot_list_len = 1, + // .accounts = .ram, + // .index = .ram, + // .name = "100k accounts (1_slot - ram index - ram accounts)", + // }, + // BenchArgs{ + // .n_accounts = 10_000, + // .slot_list_len = 10, + // .accounts = .ram, + // .index = .ram, + // .name = "10k accounts (10_slots - ram index - ram accounts)", + // }, + + // // tests large number of accounts on disk + // BenchArgs{ + // .n_accounts = 10_000, + // .slot_list_len = 10, + // .accounts = .disk, + // .index = .ram, + // .name = "10k accounts (10_slots - ram index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 500_000, + // .slot_list_len = 1, + // .accounts = .disk, + // .index = .ram, + // .name = "500k accounts (1_slot - ram index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 500_000, + // .slot_list_len = 3, + // .accounts = .disk, + // .index = .ram, + // .name = "500k accounts (3_slot - ram index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 3_000_000, + // .slot_list_len = 1, + // .accounts = .disk, + // .index = .ram, + // .name = "3M accounts (1_slot - ram index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 3_000_000, + // .slot_list_len = 3, + // .accounts = .disk, + // .index = .ram, + // .name = "3M accounts (3_slot - ram index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 500_000, + // .slot_list_len = 1, + // .accounts = .disk, + // .n_accounts_multiple = 2, // 1 mill accounts init + // .index = .ram, + // .name = "3M accounts (3_slot - ram index - disk accounts)", + // }, + + // // testing disk indexes + // BenchArgs{ + // .n_accounts = 500_000, + // .slot_list_len = 1, + // .accounts = .disk, + // .index = .disk, + // .name = "500k accounts (1_slot - disk index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 3_000_000, + // .slot_list_len = 1, + // .accounts = .disk, + // .index = .disk, + // .name = "3m accounts (1_slot - disk index - disk accounts)", + // }, + // BenchArgs{ + // .n_accounts = 500_000, + // .slot_list_len = 1, + // .accounts = .disk, + // .index = .disk, + // .n_accounts_multiple = 2, + // .name = "500k accounts (1_slot - disk index - disk accounts)", + // }, }; pub fn readAccounts(bench_args: BenchArgs) !u64 { diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 500cb2224..60834d79e 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -39,7 +39,7 @@ const requestIpEcho = @import("../net/echo.zig").requestIpEcho; const servePrometheus = @import("../prometheus/http.zig").servePrometheus; const parallelUnpackZstdTarBall = @import("../accountsdb/snapshots.zig").parallelUnpackZstdTarBall; const downloadSnapshotsFromGossip = @import("../accountsdb/download.zig").downloadSnapshotsFromGossip; -const SOCKET_TIMEOUT_US = @import("../net/socket_utils.zig").SOCKET_TIMEOUT_US; +const SOCKET_TIMEOUT_MS = @import("../net/socket_utils.zig").SOCKET_TIMEOUT_MS; const config = @import("config.zig"); // var validator_config = config.current; @@ -418,7 +418,7 @@ fn validator() !void { // repair var repair_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); try repair_socket.bindToPort(repair_port); - try repair_socket.setReadTimeout(SOCKET_TIMEOUT_US); + try repair_socket.setReadTimeout(SOCKET_TIMEOUT_MS); var repair_svc = try initRepair( logger, diff --git a/src/gossip/service.zig b/src/gossip/service.zig index 6b8186f41..b037f16e9 100644 --- a/src/gossip/service.zig +++ b/src/gossip/service.zig @@ -173,7 +173,7 @@ pub const GossipService = struct { const gossip_address = my_contact_info.getSocket(socket_tag.GOSSIP) orelse return error.GossipAddrUnspecified; var gossip_socket = UdpSocket.create(.ipv4, .udp) catch return error.SocketCreateFailed; gossip_socket.bindToPort(gossip_address.port()) catch return error.SocketBindFailed; - gossip_socket.setReadTimeout(socket_utils.SOCKET_TIMEOUT_US) catch return error.SocketSetTimeoutFailed; // 1 second + gossip_socket.setReadTimeout(socket_utils.SOCKET_TIMEOUT_MS) catch return error.SocketSetTimeoutFailed; // 1 second const failed_pull_hashes = HashTimeQueue.init(allocator); const push_msg_q = ArrayList(SignedGossipData).init(allocator); @@ -2933,27 +2933,27 @@ pub const BenchmarkGossipServiceGeneral = struct { pub const args = [_]BenchmarkArgs{ .{ - .name = "10k_ping_msgs", + .name = "5k_ping_msgs", .message_counts = .{ - .n_ping = 10_000, + .n_ping = 5_000, .n_push_message = 0, .n_pull_response = 0, }, }, .{ - .name = "10k_push_msgs", + .name = "5k_push_msgs", .message_counts = .{ .n_ping = 0, - .n_push_message = 10_000, + .n_push_message = 5_000, .n_pull_response = 0, }, }, .{ - .name = "10k_pull_resp_msgs", + .name = "1k_pull_resp_msgs", .message_counts = .{ .n_ping = 0, .n_push_message = 0, - .n_pull_response = 10_000, + .n_pull_response = 1_000, }, }, }; @@ -3050,10 +3050,10 @@ pub const BenchmarkGossipServiceGeneral = struct { var timer = try std.time.Timer.start(); while (true) { const v = gossip_service.stats.gossip_packets_processed.get(); + std.debug.print("{d} messages processed\r", .{v}); if (v >= msg_sent) { break; } - std.debug.print("{d} messages processed\r", .{v}); } const elapsed = timer.read(); std.debug.print("\r", .{}); diff --git a/src/net/socket_utils.zig b/src/net/socket_utils.zig index 22e94fde4..d577cdbb6 100644 --- a/src/net/socket_utils.zig +++ b/src/net/socket_utils.zig @@ -7,7 +7,7 @@ const Channel = @import("../sync/channel.zig").Channel; const std = @import("std"); const Logger = @import("../trace/log.zig").Logger; -pub const SOCKET_TIMEOUT_US: usize = 1 * std.time.us_per_s; +pub const SOCKET_TIMEOUT_MS: usize = 1 * std.time.ms_per_s; pub const PACKETS_PER_BATCH: usize = 64; pub fn readSocket( @@ -17,15 +17,10 @@ pub fn readSocket( exit: *const std.atomic.Value(bool), logger: Logger, ) !void { - // Performance out of the IO without poll - // * block on the socket until it's readable - // * set the socket to non blocking - // * read until it fails - // * set it back to blocking before returning + // NOTE: we set to non-blocking to periodically check if we should exit + try socket.setReadTimeout(SOCKET_TIMEOUT_MS); - try socket.setReadTimeout(SOCKET_TIMEOUT_US); - - while (!exit.load(.unordered)) { + inf_loop: while (!exit.load(.unordered)) { // init a new batch var packet_batch = try std.ArrayList(Packet).initCapacity( allocator, @@ -33,16 +28,16 @@ pub fn readSocket( ); errdefer packet_batch.deinit(); - // NOTE: usually this would be null (ie, blocking) - // but in order to exit cleanly in tests - we set to 1 second - try socket.setReadTimeout(std.time.ms_per_s); - // recv packets into batch while (packet_batch.items.len != packet_batch.capacity) { var packet: Packet = Packet.default(); const recv_meta = socket.receiveFrom(&packet.data) catch |err| switch (err) { error.WouldBlock => { if (packet_batch.items.len > 0) break; + if (exit.load(.unordered)) { + packet_batch.deinit(); + break :inf_loop; + } continue; }, else => |e| return e, @@ -57,6 +52,7 @@ pub fn readSocket( packet_batch.shrinkAndFree(packet_batch.items.len); try incoming_channel.send(packet_batch); } + logger.debugf("readSocket loop closed", .{}); } @@ -171,6 +167,7 @@ pub const BenchmarkPacketProcessing = struct { var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; var timer = try std.time.Timer.start(); + // NOTE: send more packets than we need because UDP drops some for (1..(n_packets * 2 + 1)) |i| { rand.fill(&packet_buf); _ = try socket.sendTo(to_endpoint, &packet_buf); @@ -185,6 +182,7 @@ pub const BenchmarkPacketProcessing = struct { } } } + std.debug.print("sent all packets.. waiting on receiver\r", .{}); recv_handle.join(); const elapsed = timer.read(); @@ -208,6 +206,7 @@ pub fn benchmarkChannelRecv( for (values) |packet_batch| { count += packet_batch.items.len; } + std.debug.print("recv packet count: {d}\r", .{count}); if (count >= n_values_to_receive) { break; }