From c2bf6cdd1e01c9c143e324442a3de6b61a7bda85 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 13 Sep 2023 15:43:09 -0500 Subject: [PATCH 01/72] fix prune logic --- src/benchmarks.zig | 7 +++-- src/gossip/gossip_service.zig | 49 ++++++++++++++++------------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/benchmarks.zig b/src/benchmarks.zig index 6c7e06d2a..329add027 100644 --- a/src/benchmarks.zig +++ b/src/benchmarks.zig @@ -29,7 +29,7 @@ pub fn main() !void { // TODO: very manual for now (bc we only have 2 benchmarks) // if we have more benchmarks we can make this more efficient - const max_time_per_bench = 500; // !! + const max_time_per_bench = 2 * std.time.ms_per_s; // !! if (std.mem.startsWith(u8, "socket_utils", filter)) { try benchmark( @@ -155,7 +155,10 @@ pub fn benchmark( try stderr.context.flush(); var timer = try time.Timer.start(); - inline for (functions) |def| { + inline for (functions, 0..) |def, fcni| { + if (fcni > 0) + std.debug.print("---\n", .{}); + inline for (args, 0..) |arg, index| { var runtimes: [max_iterations]u64 = undefined; var min: u64 = math.maxInt(u64); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index d335a9de7..001ae45e0 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -1071,33 +1071,30 @@ pub const GossipService = struct { var prune_packets = try std.ArrayList(Packet).initCapacity(self.allocator, n_packets); errdefer prune_packets.deinit(); - var origin_buf: [MAX_PRUNE_DATA_NODES]Pubkey = undefined; - var origin_count: usize = 0; - const now = get_wallclock_ms(); - var buf: [PACKET_DATA_SIZE]u8 = undefined; - const my_pubkey = Pubkey.fromPublicKey(&self.my_keypair.public_key, true); - - for (failed_origins.keys(), 0..) |origin, i| { - origin_buf[origin_count] = origin; - origin_count += 1; - - const is_last_iter = i == failed_origin_len - 1; - if (origin_count == MAX_PRUNE_DATA_NODES or is_last_iter) { - // create protocol message - var prune_data = PruneData.init(my_pubkey, origin_buf[0..origin_count], prune_destination, now); - prune_data.sign(&self.my_keypair) catch return error.SignatureError; - - // put it into a packet - var msg = Protocol{ .PruneMessage = .{ my_pubkey, prune_data } }; - // msg should never be bigger than the PacketSize and serialization shouldnt fail (unrecoverable) - var msg_slice = bincode.writeToSlice(&buf, msg, bincode.Params{}) catch unreachable; - var packet = Packet.init(from_gossip_endpoint, buf, msg_slice.len); - try prune_packets.append(packet); - - // reset array - origin_count = 0; - } + var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; + + var index: usize = 0; + while (true) { + const prune_size = @min(failed_origin_len - index, MAX_PRUNE_DATA_NODES); + if (prune_size == 0) break; + + var prune_data = PruneData.init( + self.my_pubkey, + failed_origins.keys()[index..(prune_size + index)], + prune_destination, + now, + ); + prune_data.sign(&self.my_keypair) catch return error.SignatureError; + + // put it into a packet + var msg = Protocol{ .PruneMessage = .{ self.my_pubkey, prune_data } }; + // msg should never be bigger than the PacketSize and serialization shouldnt fail (unrecoverable) + var msg_slice = bincode.writeToSlice(&packet_buf, msg, bincode.Params{}) catch unreachable; + var packet = Packet.init(from_gossip_endpoint, packet_buf, msg_slice.len); + try prune_packets.append(packet); + + index += prune_size; } return prune_packets; From 5be2f0f0793df3acd6b1faefa079e01be3839468 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Thu, 14 Sep 2023 17:32:43 -0500 Subject: [PATCH 02/72] fuzz changes --- build.zig | 5 +- src/gossip/fuzz.zig | 148 +++++++++++++++++++++------------- src/gossip/gossip_service.zig | 10 ++- 3 files changed, 103 insertions(+), 60 deletions(-) diff --git a/build.zig b/build.zig index 444240d3e..25c3865ba 100644 --- a/build.zig +++ b/build.zig @@ -139,7 +139,10 @@ pub fn build(b: *std.Build) void { fuzz_exe.addModule("getty", getty_mod); b.installArtifact(fuzz_exe); const fuzz_cmd = b.addRunArtifact(fuzz_exe); - b.step("fuzz_gossip", "fuzz gossip").dependOn(&fuzz_cmd.step); + if (b.args) |args| { + fuzz_cmd.addArgs(args); + } + b.step("fuzz", "fuzz gossip").dependOn(&fuzz_cmd.step); // benchmarking const benchmark_exe = b.addExecutable(.{ diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index f9a8a2fff..25d6328d3 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -3,6 +3,7 @@ //! to stop the fuzzer write any input to stdin and press enter const std = @import("std"); +const socket_utils = @import("./socket_utils.zig"); const _gossip_service = @import("./gossip_service.zig"); const GossipService = _gossip_service.GossipService; @@ -94,7 +95,10 @@ pub fn randomCrdsValue(rng: std.rand.Random, maybe_should_pass_sig_verification: var pubkey = Pubkey.fromPublicKey(&keypair.public_key, false); // will have random id - var value = try CrdsValue.random(rng, &keypair); + // var value = try CrdsValue.random(rng, &keypair); + var value = try CrdsValue.randomWithIndex(rng, &keypair, 0); + value.data.LegacyContactInfo = LegacyContactInfo.default(Pubkey.fromPublicKey(&keypair.public_key, false)); + try value.sign(&keypair); const should_pass_sig_verification = maybe_should_pass_sig_verification orelse rng.boolean(); if (should_pass_sig_verification) { @@ -152,9 +156,11 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key var bloom = try Bloom.random(allocator, 100, 0.1, N_FILTER_BITS); defer bloom.deinit(); - const crds_value = try CrdsValue.initSigned(crds.CrdsData{ + var crds_value = try CrdsValue.initSigned(crds.CrdsData{ .LegacyContactInfo = LegacyContactInfo.random(rng), }, keypair); + crds_value.data.LegacyContactInfo = LegacyContactInfo.default(Pubkey.fromPublicKey(&keypair.public_key, false)); + try crds_value.sign(keypair); var filter = CrdsFilter{ .filter = bloom, @@ -162,7 +168,8 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key .mask_bits = N_FILTER_BITS, }; - const invalid_filter = rng.boolean(); + // const invalid_filter = rng.boolean(); + const invalid_filter = false; if (invalid_filter) { filter.mask = (~@as(usize, 0)) >> rng.intRangeAtMost(u6, 1, 10); filter.mask_bits = rng.intRangeAtMost(u6, 1, 10); @@ -214,40 +221,50 @@ pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var allocator = gpa.allocator(); // use std.testing.allocator to detect leaks - var logger = Logger.init(gpa.allocator(), .debug); - defer logger.deinit(); - logger.spawn(); - - // setup the gossip service - var gossip_port: u16 = 9997; - var gossip_address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, gossip_port); - - var my_keypair = try KeyPair.create(null); - var exit = AtomicBool.init(false); + // parse cli args to define where to send packets + var cli_args = try std.process.argsWithAllocator(allocator); + defer cli_args.deinit(); + _ = cli_args.skip(); + // zig build fuzz -- + var maybe_entrypoint = cli_args.next(); + var maybe_seed = cli_args.next(); + var maybe_max_messages_string = cli_args.next(); + + const entrypoint = blk: { + if (maybe_entrypoint) |entrypoint| { + var addr = SocketAddr.parse(entrypoint) catch @panic("invalid entrypoint"); + break :blk addr; + } else { + @panic("usage: zig build fuzz -- "); + } + }; + var to_endpoint = entrypoint.toEndpoint(); + var entrypoints = std.ArrayList(SocketAddr).init(allocator); + defer entrypoints.deinit(); + try entrypoints.append(entrypoint); + + var seed = blk: { + if (maybe_seed) |seed_str| { + break :blk try std.fmt.parseInt(u64, seed_str, 10); + } else { + break :blk get_wallclock_ms(); + } + }; - // setup contact info - var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, false); - var contact_info = LegacyContactInfo.default(my_pubkey); - contact_info.shred_version = 0; - contact_info.gossip = gossip_address; + var maybe_max_messages = blk: { + if (maybe_max_messages_string) |max_messages_str| { + break :blk try std.fmt.parseInt(usize, max_messages_str, 10); + } else { + break :blk null; + } + }; - // start running gossip - var gossip_service = try GossipService.init( - allocator, - contact_info, - my_keypair, - null, - &exit, - logger, - ); - defer gossip_service.deinit(); + std.debug.print("using seed: {d}\n", .{seed}); + var rng = std.rand.DefaultPrng.init(seed); - var handle = try std.Thread.spawn( - .{}, - GossipService.run, - .{&gossip_service}, - ); - std.debug.print("gossip service started on port {d}\n", .{gossip_port}); + var logger = Logger.init(gpa.allocator(), .debug); + defer logger.deinit(); + logger.spawn(); // setup sending socket var fuzz_keypair = try KeyPair.create(null); @@ -263,42 +280,62 @@ pub fn main() !void { allocator, fuzz_contact_info, fuzz_keypair, - null, + entrypoints, &fuzz_exit, logger, ); - var fuzz_handle = try std.Thread.spawn( - .{}, - GossipService.run, - .{&gossip_service_fuzzer}, - ); - // blast it - var seed = get_wallclock_ms(); - // var seed: u64 = 1693494238796; - std.debug.print("SEED: {d}\n", .{seed}); - var rng = std.rand.DefaultPrng.init(seed); + var fuzz_handle = try std.Thread.spawn( + .{}, GossipService.run, .{ &gossip_service_fuzzer }); + + // std.debug.print("setting up", .{}); + // while (true) { + // var lg = gossip_service_fuzzer.crds_table_rw.read(); + // var table: *const CrdsTable = lg.get(); + // var n_contacts = table.contact_infos.iterator().len; + // lg.unlock(); + + // if (n_contacts > 0) { + // break; + // } + // std.debug.print(".", .{}); + // std.time.sleep(std.time.ns_per_ms); + // } + + const SLEEP_TIME = 0; + // const SLEEP_TIME = std.time.ns_per_ms * 10; + // const SLEEP_TIME = std.time.ns_per_s; // wait for keyboard input to exit var loop_exit = AtomicBool.init(false); var exit_handle = try std.Thread.spawn(.{}, waitForExit, .{&loop_exit}); + var msg_count: usize = 0; while (!loop_exit.load(std.atomic.Ordering.Unordered)) { - var command = rng.random().intRangeAtMost(u8, 0, 4); + if (maybe_max_messages) |max_messages| { + if (msg_count >= max_messages) { + break; + } + } + + // var command = rng.random().intRangeAtMost(u8, 0, 4); + // var command: usize = if (msg_count % 2 == 0) 2 else 4; + var command: usize = 3; + var packet = switch (command) { 0 => blk: { // send ping message - const packet = randomPingPacket(rng.random(), &fuzz_keypair, gossip_address.toEndpoint()); + const packet = randomPingPacket(rng.random(), &fuzz_keypair, to_endpoint); break :blk packet; }, 1 => blk: { // send pong message - const packet = randomPongPacket(rng.random(), &fuzz_keypair, gossip_address.toEndpoint()); + const packet = randomPongPacket(rng.random(), &fuzz_keypair, to_endpoint); break :blk packet; }, 2 => blk: { // send push message - const packets = randomPushMessage(rng.random(), &fuzz_keypair, gossip_address.toEndpoint()) catch |err| { + const packets = randomPushMessage(rng.random(), &fuzz_keypair, to_endpoint) catch |err| { std.debug.print("ERROR: {s}\n", .{@errorName(err)}); continue; }; @@ -309,7 +346,7 @@ pub fn main() !void { }, 3 => blk: { // send pull response - const packets = randomPullResponse(rng.random(), &fuzz_keypair, gossip_address.toEndpoint()) catch |err| { + const packets = randomPullResponse(rng.random(), &fuzz_keypair, to_endpoint) catch |err| { std.debug.print("ERROR: {s}\n", .{@errorName(err)}); continue; }; @@ -324,7 +361,7 @@ pub fn main() !void { allocator, rng.random(), &fuzz_keypair, - gossip_address.toEndpoint(), + to_endpoint, ); break :blk packet; }, @@ -339,19 +376,16 @@ pub fn main() !void { var send_duplicate = rng.random().boolean(); if (send_duplicate) { + msg_count +|= 1; try gossip_service_fuzzer.packet_outgoing_channel.send(send_packet); } - std.time.sleep(std.time.ns_per_ms * 10); - // std.time.sleep(std.time.ns_per_s); + msg_count +|= 1; + std.time.sleep(SLEEP_TIME); } // cleanup std.debug.print("\t=> shutting down...\n", .{}); - exit.store(true, std.atomic.Ordering.Unordered); - handle.join(); - std.debug.print("\t=> gossip service shutdown\n", .{}); - fuzz_exit.store(true, std.atomic.Ordering.Unordered); fuzz_handle.join(); gossip_service_fuzzer.deinit(); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index a7492e097..3e31bbaff 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -302,6 +302,7 @@ pub const GossipService = struct { /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { + var timer = std.time.Timer.start() catch unreachable; var msg_count: usize = 0; while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); @@ -310,6 +311,9 @@ pub const GossipService = struct { std.time.sleep(std.time.ns_per_ms * 1); continue; } + if (msg_count == 0) { + timer.reset(); + } const protocol_messages = maybe_protocol_messages.?; defer self.verified_incoming_channel.allocator.free(protocol_messages); @@ -495,8 +499,10 @@ pub const GossipService = struct { }; } - msg_count += 1; + msg_count += protocol_messages.len; self.messages_processed.store(msg_count, std.atomic.Ordering.Unordered); + const elapsed = timer.read(); + std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); } } @@ -903,7 +909,7 @@ pub const GossipService = struct { return null; } - const MAX_NUM_CRDS_VALUES_PULL_RESPONSE = 100; // TODO: tune + const MAX_NUM_CRDS_VALUES_PULL_RESPONSE = 20; // TODO: this is approx the rust one -- should tune var crds_table_lock = self.crds_table_rw.read(); const crds_values = blk: { defer crds_table_lock.unlock(); From 017a3feab9aa8ecb6c72642f3ed10f503a8f9138 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 19 Sep 2023 13:11:32 -0400 Subject: [PATCH 03/72] fixes --- src/gossip/fuzz.zig | 14 ++- src/gossip/gossip_service.zig | 188 +++++++++++++++++++++++++++------- 2 files changed, 157 insertions(+), 45 deletions(-) diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 25d6328d3..0290f95cc 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -157,10 +157,8 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key defer bloom.deinit(); var crds_value = try CrdsValue.initSigned(crds.CrdsData{ - .LegacyContactInfo = LegacyContactInfo.random(rng), + .LegacyContactInfo = LegacyContactInfo.default(Pubkey.fromPublicKey(&keypair.public_key, false)), }, keypair); - crds_value.data.LegacyContactInfo = LegacyContactInfo.default(Pubkey.fromPublicKey(&keypair.public_key, false)); - try crds_value.sign(keypair); var filter = CrdsFilter{ .filter = bloom, @@ -262,9 +260,9 @@ pub fn main() !void { std.debug.print("using seed: {d}\n", .{seed}); var rng = std.rand.DefaultPrng.init(seed); - var logger = Logger.init(gpa.allocator(), .debug); - defer logger.deinit(); - logger.spawn(); + // var logger = Logger.init(gpa.allocator(), .debug); + // defer logger.deinit(); + // logger.spawn(); // setup sending socket var fuzz_keypair = try KeyPair.create(null); @@ -282,11 +280,11 @@ pub fn main() !void { fuzz_keypair, entrypoints, &fuzz_exit, - logger, + .noop, ); var fuzz_handle = try std.Thread.spawn( - .{}, GossipService.run, .{ &gossip_service_fuzzer }); + .{}, GossipService.runSpy, .{ &gossip_service_fuzzer }); // std.debug.print("setting up", .{}); // while (true) { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 3e31bbaff..fb1c2078b 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -189,7 +189,6 @@ pub const GossipService = struct { } pub fn deinit(self: *Self) void { - // TODO: join and exit threads self.echo_server.deinit(); self.gossip_socket.close(); self.packet_incoming_channel.deinit(); @@ -250,6 +249,34 @@ pub const GossipService = struct { defer self.joinAndExit(&responder_handle); } + pub fn runSpy(self: *Self) !void { + var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); + defer self.joinAndExit(&ip_echo_server_listener_handle); + + var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ + &self.gossip_socket, + self.packet_incoming_channel, + self.exit, + self.logger, + }); + defer self.joinAndExit(&receiver_handle); + + var packet_verifier_handle = try Thread.spawn(.{}, Self.verifyPackets, .{self}); + defer self.joinAndExit(&packet_verifier_handle); + + var packet_handle = try Thread.spawn(.{}, Self.processMessages, .{self}); + defer self.joinAndExit(&packet_handle); + + // outputer thread + var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ + &self.gossip_socket, + self.packet_outgoing_channel, + self.exit, + self.logger, + }); + defer self.joinAndExit(&responder_handle); + } + /// main logic for deserializing Packets into Protocol messages /// and verifing they have valid values, and have valid signatures. /// Verified Protocol messages are then sent to the verified_channel. @@ -482,28 +509,33 @@ pub const GossipService = struct { const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); _ = ping_cache.receviedPong(pong, SocketAddr.fromEndpoint(from_endpoint), now); } + self.logger .field("from_endpoint", endpoint_buf.items) .field("from_pubkey", &pong.from.string()) .info("received pong message"); }, } + } - { - var crds_table_lock = self.crds_table_rw.write(); - defer crds_table_lock.unlock(); - - var crds_table: *CrdsTable = crds_table_lock.mut(); - crds_table.attemptTrim(CRDS_UNIQUE_PUBKEY_CAPACITY) catch |err| { - self.logger.warnf("error trimming crds table: {s}", .{@errorName(err)}); - }; - } - - msg_count += protocol_messages.len; - self.messages_processed.store(msg_count, std.atomic.Ordering.Unordered); - const elapsed = timer.read(); - std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + { + // var table_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = table_timer.read(); + // std.debug.print("crds table trim took {}ns\n", .{elapsed}); + // } + var crds_table_lock = self.crds_table_rw.write(); + defer crds_table_lock.unlock(); + + var crds_table: *CrdsTable = crds_table_lock.mut(); + crds_table.attemptTrim(CRDS_UNIQUE_PUBKEY_CAPACITY) catch |err| { + self.logger.warnf("error trimming crds table: {s}", .{@errorName(err)}); + }; } + + // const elapsed = timer.read(); + // std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + self.messages_processed.store(msg_count, std.atomic.Ordering.Unordered); } self.logger.debugf("process_messages loop closed\n", .{}); @@ -873,7 +905,7 @@ pub const GossipService = struct { { var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); - var crds_table = crds_table_lock.mut(); + var crds_table: *CrdsTable = crds_table_lock.mut(); crds_table.insert(pull_value, now) catch {}; crds_table.updateRecordTimestamp(pull_value.id(), now); @@ -2035,6 +2067,20 @@ pub const BenchmarkMessageProcessing = struct { "10_msg_iters", "100_msg_iters", }; + const Sender = struct { + const Self = @This(); + + gs: *GossipService, + to_endpoint: EndPoint, + + pub fn send(self: *Self, msg: Protocol) void { + self.gs.verified_incoming_channel.send(ProtocolMessage{ + .message = msg, + .from_endpoint = self.to_endpoint, + }) catch @panic("ahhhh"); + } + }; + pub fn benchmarkGossipService(num_message_iterations: usize) !void { const allocator = std.heap.page_allocator; var keypair = try KeyPair.create(null); @@ -2045,6 +2091,11 @@ pub const BenchmarkMessageProcessing = struct { contact_info.shred_version = 19; contact_info.gossip = address; + // var logger = Logger.init(allocator, .debug); + // defer logger.deinit(); + // logger.spawn(); + var logger: Logger = .noop; + var exit = AtomicBool.init(false); var gossip_service = try GossipService.init( allocator, @@ -2052,42 +2103,25 @@ pub const BenchmarkMessageProcessing = struct { keypair, null, &exit, - .noop, + logger, ); defer gossip_service.deinit(); - var logger = Logger.init(allocator, .debug); - defer logger.deinit(); - logger.spawn(); - var packet_handle = try Thread.spawn(.{}, GossipService.processMessages, .{ &gossip_service, }); var rand = std.rand.DefaultPrng.init(19); var rng = rand.random(); - - const Sender = struct { - const Self = @This(); - - gs: *GossipService, - to_endpoint: EndPoint, - - pub fn send(self: *Self, msg: Protocol) void { - self.gs.verified_incoming_channel.send(ProtocolMessage{ - .message = msg, - .from_endpoint = self.to_endpoint, - }) catch @panic("ahhhh"); - } - }; var sender = Sender{ .gs = &gossip_service, .to_endpoint = address.toEndpoint(), }; + var sender_keypair = try KeyPair.create(null); - // send a ping message var msg_sent: usize = 0; for (0..num_message_iterations) |_| { + // send a ping message { var msg = try fuzz.randomPing(rng, &keypair); sender.send(msg); @@ -2110,7 +2144,7 @@ pub const BenchmarkMessageProcessing = struct { msg_sent += 1; } } - // send a pull message + // send a pull response { var packets = try fuzz.randomPullResponse(rng, &keypair, address.toEndpoint()); defer packets.deinit(); @@ -2121,6 +2155,86 @@ pub const BenchmarkMessageProcessing = struct { msg_sent += 1; } } + // send a pull request + { + var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); + var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + sender.send(msg); + msg_sent += 1; + } + } + + while (true) { + const v = gossip_service.messages_processed.load(std.atomic.Ordering.Unordered); + if (v == msg_sent) { + break; + } + } + + exit.store(true, std.atomic.Ordering.Unordered); + packet_handle.join(); + } + + pub fn benchmarkPullRequests(num_message_iterations: usize) !void { + const allocator = std.heap.page_allocator; + var address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); + + var keypair = try KeyPair.create(null); + var pubkey = Pubkey.fromPublicKey(&keypair.public_key, false); + var contact_info = crds.LegacyContactInfo.default(pubkey); + contact_info.shred_version = 19; + contact_info.gossip = address; + + // var logger = Logger.init(allocator, .debug); + // defer logger.deinit(); + // logger.spawn(); + var logger: Logger = .noop; + + var exit = AtomicBool.init(false); + var gossip_service = try GossipService.init( + allocator, + contact_info, + keypair, + null, + &exit, + logger, + ); + defer gossip_service.deinit(); + + var packet_handle = try Thread.spawn(.{}, GossipService.processMessages, .{ + &gossip_service, + }); + + var rand = std.rand.DefaultPrng.init(19); + var rng = rand.random(); + + var sender_keypair = try KeyPair.create(null); + + var sender = Sender{ + .gs = &gossip_service, + .to_endpoint = address.toEndpoint(), + }; + + var msg_sent: usize = 0; + for (0..num_message_iterations) |_| { + // send a push message + { + var packets = try fuzz.randomPushMessage(rng, &sender_keypair, address.toEndpoint()); + defer packets.deinit(); + + for (packets.items) |packet| { + var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + sender.send(msg); + msg_sent += 1; + } + } + // send a pull request + { + var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); + var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + sender.send(msg); + msg_sent += 1; + } } while (true) { From 181c096a39f185d26835d59754ac246fb89b568d Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 19 Sep 2023 13:23:03 -0400 Subject: [PATCH 04/72] fix pull req gossip leak --- src/cmd/cmd.zig | 11 +++++---- src/gossip/fuzz.zig | 17 +++++++++++-- src/gossip/gossip_service.zig | 46 ++++++++++++++++++++++++++++++----- src/gossip/pull_response.zig | 39 ++++++++++++++++++++++++++--- src/gossip/socket_utils.zig | 4 +-- 5 files changed, 99 insertions(+), 18 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index f8301d683..64ec4ee2a 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -70,9 +70,10 @@ fn identity(_: []const []const u8) !void { // gossip entrypoint fn gossip(_: []const []const u8) !void { - var logger = Logger.init(gpa_allocator, .debug); - defer logger.deinit(); - logger.spawn(); + // var logger = Logger.init(gpa_allocator, .debug); + // defer logger.deinit(); + // logger.spawn(); + var logger: Logger = .noop; var my_keypair = try getOrInitIdentity(gpa_allocator, logger); @@ -83,7 +84,7 @@ fn gossip(_: []const []const u8) !void { // setup contact info var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, false); var contact_info = LegacyContactInfo.default(my_pubkey); - contact_info.shred_version = 0; // TODO: double check + contact_info.shred_version = 0; contact_info.gossip = gossip_address; var entrypoints = std.ArrayList(SocketAddr).init(gpa_allocator); @@ -112,7 +113,7 @@ fn gossip(_: []const []const u8) !void { var handle = try std.Thread.spawn( .{}, - GossipService.run, + GossipService.runSpy, .{&gossip_service}, ); diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 0290f95cc..709b79a50 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -183,6 +183,7 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key } else { // add some valid hashes var filter_set = try pull_request.CrdsFilterSet.initTest(allocator, filter.mask_bits); + for (0..5) |_| { var value = try randomCrdsValue(rng, true); var buf: [PACKET_DATA_SIZE]u8 = undefined; @@ -196,6 +197,9 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key filter.mask = filters.items[0].mask; filter.mask_bits = filters.items[0].mask_bits; + for (filters.items[1..]) |*filter_i| { + filter_i.filter.deinit(); + } filters.deinit(); } @@ -204,6 +208,11 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; var msg_slice = try bincode.writeToSlice(&packet_buf, msg, bincode.Params{}); var packet = Packet.init(to_addr, packet_buf, msg_slice.len); + + if (!invalid_filter) { + filter.filter.deinit(); + } + return packet; } @@ -316,9 +325,9 @@ pub fn main() !void { } } - // var command = rng.random().intRangeAtMost(u8, 0, 4); + var command = rng.random().intRangeAtMost(u8, 0, 4); // var command: usize = if (msg_count % 2 == 0) 2 else 4; - var command: usize = 3; + // var command: usize = 4; var packet = switch (command) { 0 => blk: { @@ -380,6 +389,10 @@ pub fn main() !void { msg_count +|= 1; std.time.sleep(SLEEP_TIME); + + if (msg_count % 1000 == 0) { + std.debug.print("{d} messages sent\n", .{msg_count}); + } } // cleanup diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index fb1c2078b..5fddc7f4c 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const network = @import("zig-network"); const EndPoint = network.EndPoint; const Packet = @import("packet.zig").Packet; @@ -67,7 +68,7 @@ pub const MAX_BYTES_PER_PUSH: u64 = PACKET_DATA_SIZE * @as(u64, MAX_PACKETS_PER_ // 4 (enum) + 32 (pubkey) + 8 (len) = 44 pub const MAX_PUSH_MESSAGE_PAYLOAD_SIZE: usize = PACKET_DATA_SIZE - 44; -pub const GOSSIP_SLEEP_MILLIS: u64 = 1 * std.time.ms_per_s; +pub const GOSSIP_SLEEP_MILLIS: u64 = 100; pub const GOSSIP_PING_CACHE_CAPACITY: usize = 65536; pub const GOSSIP_PING_CACHE_TTL_NS: u64 = std.time.ns_per_s * 1280; pub const GOSSIP_PING_CACHE_RATE_LIMIT_DELAY_NS: u64 = std.time.ns_per_s * (1280 / 64); @@ -286,8 +287,8 @@ pub const GossipService = struct { while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_packets = try self.packet_incoming_channel.try_drain(); if (maybe_packets == null) { - // sleep for 1ms - std.time.sleep(std.time.ns_per_ms * 1); + // // sleep for 1ms + // std.time.sleep(std.time.ns_per_ms * 1); continue; } @@ -334,8 +335,8 @@ pub const GossipService = struct { while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { - // sleep for 1ms - std.time.sleep(std.time.ns_per_ms * 1); + // // sleep for 1ms + // std.time.sleep(std.time.ns_per_ms * 1); continue; } if (msg_count == 0) { @@ -344,6 +345,7 @@ pub const GossipService = struct { const protocol_messages = maybe_protocol_messages.?; defer self.verified_incoming_channel.allocator.free(protocol_messages); + msg_count += protocol_messages.len; for (protocol_messages) |protocol_message| { var message: Protocol = protocol_message.message; @@ -351,6 +353,12 @@ pub const GossipService = struct { switch (message) { .PushMessage => |*push| { + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // std.debug.print("push_message took {}ns\n", .{elapsed}); + // } + const push_from: Pubkey = push[0]; const push_values: []CrdsValue = push[1]; @@ -385,6 +393,12 @@ pub const GossipService = struct { push_log_entry.info("received push message"); }, .PullResponse => |*pull| { + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // std.debug.print("pull_response took {}ns\n", .{elapsed}); + // } + const from: Pubkey = pull[0]; const crds_values: []CrdsValue = pull[1]; @@ -404,6 +418,12 @@ pub const GossipService = struct { // pull_log_entry.info("received pull response"); }, .PullRequest => |*pull| { + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // std.debug.print("pull_request took {}ns\n", .{elapsed}); + // } + var pull_filter: CrdsFilter = pull[0]; var pull_value: CrdsValue = pull[1]; // contact info @@ -436,6 +456,7 @@ pub const GossipService = struct { .err("error handling pull request"); continue; }; + if (maybe_packets == null) { pull_log_entry.field("num_packets_resp", 0) .info("received pull request"); @@ -903,6 +924,12 @@ pub const GossipService = struct { const now = get_wallclock_ms(); { + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("pull_request crds_table_insert took {}ns\n", .{elapsed}); + } + var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); var crds_table: *CrdsTable = crds_table_lock.mut(); @@ -942,9 +969,16 @@ pub const GossipService = struct { } const MAX_NUM_CRDS_VALUES_PULL_RESPONSE = 20; // TODO: this is approx the rust one -- should tune - var crds_table_lock = self.crds_table_rw.read(); const crds_values = blk: { + var crds_table_lock = self.crds_table_rw.read(); defer crds_table_lock.unlock(); + + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("pull_request filterCrdsValues took {}ns\n", .{elapsed}); + } + break :blk try pull_response.filterCrdsValues( self.allocator, crds_table_lock.get(), diff --git a/src/gossip/pull_response.zig b/src/gossip/pull_response.zig index 8745ae2f3..38038c28d 100644 --- a/src/gossip/pull_response.zig +++ b/src/gossip/pull_response.zig @@ -36,14 +36,14 @@ pub fn filterCrdsValues( const jitter = rng.intRangeAtMost(u64, 0, CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS / 4); const caller_wallclock_with_jitter = caller_wallclock + jitter; - var output = ArrayList(CrdsValue).init(alloc); - errdefer output.deinit(); - var bloom = filter.filter; var match_indexs = try crds_table.getBitmaskMatches(alloc, filter.mask, filter.mask_bits); defer match_indexs.deinit(); + var output = try ArrayList(CrdsValue).initCapacity(alloc, match_indexs.items.len); + errdefer output.deinit(); + for (match_indexs.items) |entry_index| { var entry = crds_table.store.iterator().values[entry_index]; @@ -70,6 +70,39 @@ pub fn filterCrdsValues( return output; } +test "gossip.pull: test filter_crds_values batch" { + var crds_table = try CrdsTable.init(std.testing.allocator); + var crds_table_rw = RwMux(CrdsTable).init(crds_table); + defer { + var lg = crds_table_rw.write(); + lg.mut().deinit(); + } + var seed: u64 = 18; + var rand = std.rand.DefaultPrng.init(seed); + const rng = rand.random(); + + // insert a some values + const keypair = try KeyPair.create([_]u8{1} ** 32); + var lg = crds_table_rw.write(); + for (0..100) |_| { + var crds_value = try crds.CrdsValue.random(rng, &keypair); + try lg.mut().insert(crds_value, 0); + } + lg.unlock(); + + const fuzz = @import("fuzz.zig"); + const SocketAddr = @import("../net/net.zig").SocketAddr; + + // create a pull request + const allocator = std.testing.allocator; + const to_addr = SocketAddr.random(rng).toEndpoint(); + + const packet = try fuzz.randomPullRequest(allocator, rng, &keypair, to_addr); + _ = packet; + // var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + +} + test "gossip.pull: test filter_crds_values" { var crds_table = try CrdsTable.init(std.testing.allocator); var crds_table_rw = RwMux(CrdsTable).init(crds_table); diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index bce916877..16b91a147 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -17,7 +17,7 @@ pub fn readSocket( while (!exit.load(std.atomic.Ordering.Unordered)) { const recv_meta = socket.receiveFrom(&read_buf) catch |err| { if (err == error.WouldBlock) { - std.time.sleep(std.time.ns_per_ms * 1); + // std.time.sleep(std.time.ns_per_ms * 1); continue; } else { logger.debugf("read_socket error: {s}\n", .{@errorName(err)}); @@ -51,7 +51,7 @@ pub fn sendSocket( const maybe_packets = try outgoing_channel.try_drain(); if (maybe_packets == null) { // sleep for 1ms - std.time.sleep(std.time.ns_per_ms * 1); + // std.time.sleep(std.time.ns_per_ms * 1); continue; } const packets = maybe_packets.?; From 53fe8ac9d174ea08a15a8709c73d80910d6eb7e0 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 19 Sep 2023 13:23:03 -0400 Subject: [PATCH 05/72] fix pull req gossip leak --- src/cmd/cmd.zig | 11 +++++---- src/gossip/fuzz.zig | 17 +++++++++++-- src/gossip/gossip_service.zig | 46 ++++++++++++++++++++++++++++++----- src/gossip/pull_response.zig | 39 ++++++++++++++++++++++++++--- src/gossip/socket_utils.zig | 4 +-- 5 files changed, 99 insertions(+), 18 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index f8301d683..64ec4ee2a 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -70,9 +70,10 @@ fn identity(_: []const []const u8) !void { // gossip entrypoint fn gossip(_: []const []const u8) !void { - var logger = Logger.init(gpa_allocator, .debug); - defer logger.deinit(); - logger.spawn(); + // var logger = Logger.init(gpa_allocator, .debug); + // defer logger.deinit(); + // logger.spawn(); + var logger: Logger = .noop; var my_keypair = try getOrInitIdentity(gpa_allocator, logger); @@ -83,7 +84,7 @@ fn gossip(_: []const []const u8) !void { // setup contact info var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, false); var contact_info = LegacyContactInfo.default(my_pubkey); - contact_info.shred_version = 0; // TODO: double check + contact_info.shred_version = 0; contact_info.gossip = gossip_address; var entrypoints = std.ArrayList(SocketAddr).init(gpa_allocator); @@ -112,7 +113,7 @@ fn gossip(_: []const []const u8) !void { var handle = try std.Thread.spawn( .{}, - GossipService.run, + GossipService.runSpy, .{&gossip_service}, ); diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 0290f95cc..709b79a50 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -183,6 +183,7 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key } else { // add some valid hashes var filter_set = try pull_request.CrdsFilterSet.initTest(allocator, filter.mask_bits); + for (0..5) |_| { var value = try randomCrdsValue(rng, true); var buf: [PACKET_DATA_SIZE]u8 = undefined; @@ -196,6 +197,9 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key filter.mask = filters.items[0].mask; filter.mask_bits = filters.items[0].mask_bits; + for (filters.items[1..]) |*filter_i| { + filter_i.filter.deinit(); + } filters.deinit(); } @@ -204,6 +208,11 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; var msg_slice = try bincode.writeToSlice(&packet_buf, msg, bincode.Params{}); var packet = Packet.init(to_addr, packet_buf, msg_slice.len); + + if (!invalid_filter) { + filter.filter.deinit(); + } + return packet; } @@ -316,9 +325,9 @@ pub fn main() !void { } } - // var command = rng.random().intRangeAtMost(u8, 0, 4); + var command = rng.random().intRangeAtMost(u8, 0, 4); // var command: usize = if (msg_count % 2 == 0) 2 else 4; - var command: usize = 3; + // var command: usize = 4; var packet = switch (command) { 0 => blk: { @@ -380,6 +389,10 @@ pub fn main() !void { msg_count +|= 1; std.time.sleep(SLEEP_TIME); + + if (msg_count % 1000 == 0) { + std.debug.print("{d} messages sent\n", .{msg_count}); + } } // cleanup diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index fb1c2078b..5fddc7f4c 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const network = @import("zig-network"); const EndPoint = network.EndPoint; const Packet = @import("packet.zig").Packet; @@ -67,7 +68,7 @@ pub const MAX_BYTES_PER_PUSH: u64 = PACKET_DATA_SIZE * @as(u64, MAX_PACKETS_PER_ // 4 (enum) + 32 (pubkey) + 8 (len) = 44 pub const MAX_PUSH_MESSAGE_PAYLOAD_SIZE: usize = PACKET_DATA_SIZE - 44; -pub const GOSSIP_SLEEP_MILLIS: u64 = 1 * std.time.ms_per_s; +pub const GOSSIP_SLEEP_MILLIS: u64 = 100; pub const GOSSIP_PING_CACHE_CAPACITY: usize = 65536; pub const GOSSIP_PING_CACHE_TTL_NS: u64 = std.time.ns_per_s * 1280; pub const GOSSIP_PING_CACHE_RATE_LIMIT_DELAY_NS: u64 = std.time.ns_per_s * (1280 / 64); @@ -286,8 +287,8 @@ pub const GossipService = struct { while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_packets = try self.packet_incoming_channel.try_drain(); if (maybe_packets == null) { - // sleep for 1ms - std.time.sleep(std.time.ns_per_ms * 1); + // // sleep for 1ms + // std.time.sleep(std.time.ns_per_ms * 1); continue; } @@ -334,8 +335,8 @@ pub const GossipService = struct { while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { - // sleep for 1ms - std.time.sleep(std.time.ns_per_ms * 1); + // // sleep for 1ms + // std.time.sleep(std.time.ns_per_ms * 1); continue; } if (msg_count == 0) { @@ -344,6 +345,7 @@ pub const GossipService = struct { const protocol_messages = maybe_protocol_messages.?; defer self.verified_incoming_channel.allocator.free(protocol_messages); + msg_count += protocol_messages.len; for (protocol_messages) |protocol_message| { var message: Protocol = protocol_message.message; @@ -351,6 +353,12 @@ pub const GossipService = struct { switch (message) { .PushMessage => |*push| { + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // std.debug.print("push_message took {}ns\n", .{elapsed}); + // } + const push_from: Pubkey = push[0]; const push_values: []CrdsValue = push[1]; @@ -385,6 +393,12 @@ pub const GossipService = struct { push_log_entry.info("received push message"); }, .PullResponse => |*pull| { + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // std.debug.print("pull_response took {}ns\n", .{elapsed}); + // } + const from: Pubkey = pull[0]; const crds_values: []CrdsValue = pull[1]; @@ -404,6 +418,12 @@ pub const GossipService = struct { // pull_log_entry.info("received pull response"); }, .PullRequest => |*pull| { + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // std.debug.print("pull_request took {}ns\n", .{elapsed}); + // } + var pull_filter: CrdsFilter = pull[0]; var pull_value: CrdsValue = pull[1]; // contact info @@ -436,6 +456,7 @@ pub const GossipService = struct { .err("error handling pull request"); continue; }; + if (maybe_packets == null) { pull_log_entry.field("num_packets_resp", 0) .info("received pull request"); @@ -903,6 +924,12 @@ pub const GossipService = struct { const now = get_wallclock_ms(); { + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("pull_request crds_table_insert took {}ns\n", .{elapsed}); + } + var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); var crds_table: *CrdsTable = crds_table_lock.mut(); @@ -942,9 +969,16 @@ pub const GossipService = struct { } const MAX_NUM_CRDS_VALUES_PULL_RESPONSE = 20; // TODO: this is approx the rust one -- should tune - var crds_table_lock = self.crds_table_rw.read(); const crds_values = blk: { + var crds_table_lock = self.crds_table_rw.read(); defer crds_table_lock.unlock(); + + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("pull_request filterCrdsValues took {}ns\n", .{elapsed}); + } + break :blk try pull_response.filterCrdsValues( self.allocator, crds_table_lock.get(), diff --git a/src/gossip/pull_response.zig b/src/gossip/pull_response.zig index 8745ae2f3..38038c28d 100644 --- a/src/gossip/pull_response.zig +++ b/src/gossip/pull_response.zig @@ -36,14 +36,14 @@ pub fn filterCrdsValues( const jitter = rng.intRangeAtMost(u64, 0, CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS / 4); const caller_wallclock_with_jitter = caller_wallclock + jitter; - var output = ArrayList(CrdsValue).init(alloc); - errdefer output.deinit(); - var bloom = filter.filter; var match_indexs = try crds_table.getBitmaskMatches(alloc, filter.mask, filter.mask_bits); defer match_indexs.deinit(); + var output = try ArrayList(CrdsValue).initCapacity(alloc, match_indexs.items.len); + errdefer output.deinit(); + for (match_indexs.items) |entry_index| { var entry = crds_table.store.iterator().values[entry_index]; @@ -70,6 +70,39 @@ pub fn filterCrdsValues( return output; } +test "gossip.pull: test filter_crds_values batch" { + var crds_table = try CrdsTable.init(std.testing.allocator); + var crds_table_rw = RwMux(CrdsTable).init(crds_table); + defer { + var lg = crds_table_rw.write(); + lg.mut().deinit(); + } + var seed: u64 = 18; + var rand = std.rand.DefaultPrng.init(seed); + const rng = rand.random(); + + // insert a some values + const keypair = try KeyPair.create([_]u8{1} ** 32); + var lg = crds_table_rw.write(); + for (0..100) |_| { + var crds_value = try crds.CrdsValue.random(rng, &keypair); + try lg.mut().insert(crds_value, 0); + } + lg.unlock(); + + const fuzz = @import("fuzz.zig"); + const SocketAddr = @import("../net/net.zig").SocketAddr; + + // create a pull request + const allocator = std.testing.allocator; + const to_addr = SocketAddr.random(rng).toEndpoint(); + + const packet = try fuzz.randomPullRequest(allocator, rng, &keypair, to_addr); + _ = packet; + // var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + +} + test "gossip.pull: test filter_crds_values" { var crds_table = try CrdsTable.init(std.testing.allocator); var crds_table_rw = RwMux(CrdsTable).init(crds_table); diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index bce916877..16b91a147 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -17,7 +17,7 @@ pub fn readSocket( while (!exit.load(std.atomic.Ordering.Unordered)) { const recv_meta = socket.receiveFrom(&read_buf) catch |err| { if (err == error.WouldBlock) { - std.time.sleep(std.time.ns_per_ms * 1); + // std.time.sleep(std.time.ns_per_ms * 1); continue; } else { logger.debugf("read_socket error: {s}\n", .{@errorName(err)}); @@ -51,7 +51,7 @@ pub fn sendSocket( const maybe_packets = try outgoing_channel.try_drain(); if (maybe_packets == null) { // sleep for 1ms - std.time.sleep(std.time.ns_per_ms * 1); + // std.time.sleep(std.time.ns_per_ms * 1); continue; } const packets = maybe_packets.?; From 3ebe1649177acf76053ce47d3a178c341ee7c2b2 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 19 Sep 2023 15:16:57 -0400 Subject: [PATCH 06/72] adding parallel processing of pull requests code --- src/gossip/pull_response.zig | 153 +++- src/sync/thread_pool.zig | 1316 ++++++++++++++++++++++++++++++++++ 2 files changed, 1464 insertions(+), 5 deletions(-) create mode 100644 src/sync/thread_pool.zig diff --git a/src/gossip/pull_response.zig b/src/gossip/pull_response.zig index 38038c28d..6e35aef76 100644 --- a/src/gossip/pull_response.zig +++ b/src/gossip/pull_response.zig @@ -71,6 +71,9 @@ pub fn filterCrdsValues( } test "gossip.pull: test filter_crds_values batch" { + const N_FILTERS = 100; + const N_VALUES_IN_TABLE = 10_000; + var crds_table = try CrdsTable.init(std.testing.allocator); var crds_table_rw = RwMux(CrdsTable).init(crds_table); defer { @@ -84,7 +87,7 @@ test "gossip.pull: test filter_crds_values batch" { // insert a some values const keypair = try KeyPair.create([_]u8{1} ** 32); var lg = crds_table_rw.write(); - for (0..100) |_| { + for (0..N_VALUES_IN_TABLE) |_| { var crds_value = try crds.CrdsValue.random(rng, &keypair); try lg.mut().insert(crds_value, 0); } @@ -92,15 +95,155 @@ test "gossip.pull: test filter_crds_values batch" { const fuzz = @import("fuzz.zig"); const SocketAddr = @import("../net/net.zig").SocketAddr; + const bincode = @import("../bincode/bincode.zig"); + const Protocol = @import("protocol.zig").Protocol; // create a pull request - const allocator = std.testing.allocator; + // const allocator = std.testing.allocator; + const allocator = std.heap.c_allocator; const to_addr = SocketAddr.random(rng).toEndpoint(); - const packet = try fuzz.randomPullRequest(allocator, rng, &keypair, to_addr); - _ = packet; - // var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + var filters = try std.ArrayList(CrdsFilter).initCapacity(allocator, N_FILTERS); + defer { + for (filters.items) |*filter| { + filter.deinit(); + } + filters.deinit(); + } + for (0..N_FILTERS) |_| { + const packet = try fuzz.randomPullRequest(allocator, rng, &keypair, to_addr); + var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); + var filter: CrdsFilter = msg.PullRequest[0]; + filters.appendAssumeCapacity(filter); + } + + // process them sequentially + var resp_values = std.ArrayList(CrdsValue).init(allocator); + defer resp_values.deinit(); + var read_lg = crds_table_rw.read(); + var crds_table_read: *const CrdsTable = read_lg.get(); + + var seq_timer = try std.time.Timer.start(); + for (filters.items) |*filter| { + const resp = try filterCrdsValues( + allocator, + crds_table_read, + filter, + crds.getWallclockMs(), + 100 + ); + defer resp.deinit(); + + try resp_values.appendSlice(resp.items); + } + read_lg.unlock(); + std.debug.assert(resp_values.items.len > 0); + const seq_elapsed = seq_timer.read(); + std.debug.print("SEQ: elapsed = {}\n", .{seq_elapsed}); + + // process them in parallel + const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; + const Task = ThreadPool.Task; + + var pool = ThreadPool.init(.{ + .max_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), + .stack_size = 2 * 1024 * 1024, + }); + + const PullRequestContext = struct { + filter: *const CrdsFilter, + crds_table: *const CrdsTable, + output: ArrayList(CrdsValue), + done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + }; + + const PullRequestTask = struct { + task: Task, + context: *PullRequestContext, + allocator: std.mem.Allocator, + + pub fn callback(task: *Task) void { + var self = @fieldParentPtr(@This(), "task", task); + const response_crds_values = filterCrdsValues( + self.allocator, + self.context.crds_table, + self.context.filter, + crds.getWallclockMs(), + 100, + ) catch { + // std.debug.print("filterCrdsValues failed\n", .{}); + return; + }; + self.context.output.appendSlice(response_crds_values.items) catch { + // std.debug.print("append slice failed\n", .{}); + return; + }; + // std.debug.print("success: len = {}\n", .{ response_crds_values.items.len }); + self.context.done.store(true, std.atomic.Ordering.Release); + } + }; + + // read lock crds table + read_lg = crds_table_rw.read(); + crds_table_read = read_lg.get(); + var batch: ThreadPool.Batch = undefined; + var parallel_timer = try std.time.Timer.start(); + + var tasks = try std.ArrayList(*PullRequestTask).initCapacity(allocator, filters.items.len); + for (filters.items, 0..) |*filter_i, i| { + var output = ArrayList(CrdsValue).init(allocator); + var context = PullRequestContext { + .filter = filter_i, + .crds_table = crds_table_read, + .output = output, + }; + var context_heap = try allocator.create(PullRequestContext); + context_heap.* = context; + + var pull_task = PullRequestTask { + .task = .{ .callback = PullRequestTask.callback }, + .context = context_heap, + .allocator = allocator, + }; + + // alloc on heap + var pull_task_heap = try allocator.create(PullRequestTask); + pull_task_heap.* = pull_task; + tasks.appendAssumeCapacity(pull_task_heap); + + if (i == 0) { + batch = ThreadPool.Batch.from(&pull_task_heap.task); + } else { + var tmp_batch = ThreadPool.Batch.from(&pull_task_heap.task); + batch.push(tmp_batch); + } + } + // schedule the threadpool + ThreadPool.schedule(&pool, batch); + + for (tasks.items) |task| { + while (!task.context.done.load(std.atomic.Ordering.Acquire)) { + // wait + } + } + // unlock crds table + read_lg.unlock(); + const parallel_elapsed = parallel_timer.read(); + std.debug.print("PARALLEL: elapsed: {}\n", .{parallel_elapsed}); + + var total_len: usize = 0; + for (tasks.items) |task| { + total_len += task.context.output.items.len; + } + try std.testing.expect(total_len == resp_values.items.len); + const time_diff: i128 = @as(i128, @intCast(parallel_elapsed)) - @as(i128, @intCast(seq_elapsed)); + std.debug.print("TIME DIFF: {}(ns)\n", .{time_diff}); + if (time_diff > 0) { + std.debug.print("sequential fast\n", .{}); + } else { + std.debug.print("parallel fast\n", .{}); + } } test "gossip.pull: test filter_crds_values" { diff --git a/src/sync/thread_pool.zig b/src/sync/thread_pool.zig new file mode 100644 index 000000000..51fe7e8be --- /dev/null +++ b/src/sync/thread_pool.zig @@ -0,0 +1,1316 @@ +// Thank you bun.sh: +// https://github.com/oven-sh/bun/blob/main/src/thread_pool.zig +// +// Thank you @kprotty: +// https://github.com/kprotty/zap/blob/blog/src/thread_pool.zig + +const std = @import("std"); +const builtin = @import("builtin"); +const Futex = std.Thread.Futex; +const assert = std.debug.assert; +const Atomic = std.atomic.Atomic; +pub const OnSpawnCallback = *const fn (ctx: ?*anyopaque) ?*anyopaque; + +pub const ThreadPool = struct { + sleep_on_idle_network_thread: bool = true, + /// executed on the thread + on_thread_spawn: ?OnSpawnCallback = null, + threadpool_context: ?*anyopaque = null, + stack_size: u32, + max_threads: u32, + sync: Atomic(u32) = Atomic(u32).init(@as(u32, @bitCast(Sync{}))), + idle_event: Event = .{}, + join_event: Event = .{}, + run_queue: Node.Queue = .{}, + threads: Atomic(?*Thread) = Atomic(?*Thread).init(null), + name: []const u8 = "", + spawned_thread_count: Atomic(u32) = Atomic(u32).init(0), + + const Sync = packed struct { + /// Tracks the number of threads not searching for Tasks + idle: u14 = 0, + /// Tracks the number of threads spawned + spawned: u14 = 0, + /// What you see is what you get + unused: bool = false, + /// Used to not miss notifications while state = waking + notified: bool = false, + /// The current state of the thread pool + state: enum(u2) { + /// A notification can be issued to wake up a sleeping as the "waking thread". + pending = 0, + /// The state was notified with a signal. A thread is woken up. + /// The first thread to transition to `waking` becomes the "waking thread". + signaled, + /// There is a "waking thread" among us. + /// No other thread should be woken up until the waking thread transitions the state. + waking, + /// The thread pool was terminated. Start decremented `spawned` so that it can be joined. + shutdown, + } = .pending, + }; + + /// Configuration options for the thread pool. + /// TODO: add CPU core affinity? + pub const Config = struct { + stack_size: u32 = (std.Thread.SpawnConfig{}).stack_size, + max_threads: u32, + }; + + /// Statically initialize the thread pool using the configuration. + pub fn init(config: Config) ThreadPool { + return .{ + .stack_size = @max(1, config.stack_size), + .max_threads = @max(1, config.max_threads), + }; + } + + pub fn wakeForIdleEvents(this: *ThreadPool) void { + // Wake all the threads to check for idle events. + this.idle_event.wake(Event.NOTIFIED, std.math.maxInt(u32)); + } + + /// Wait for a thread to call shutdown() on the thread pool and kill the worker threads. + pub fn deinit(self: *ThreadPool) void { + self.join(); + self.* = undefined; + } + + /// A Task represents the unit of Work / Job / Execution that the ThreadPool schedules. + /// The user provides a `callback` which is invoked when the *Task can run on a thread. + pub const Task = struct { + node: Node = .{}, + callback: *const (fn (*Task) void), + }; + + /// An unordered collection of Tasks which can be submitted for scheduling as a group. + pub const Batch = struct { + len: usize = 0, + head: ?*Task = null, + tail: ?*Task = null, + + pub fn pop(this: *Batch) ?*Task { + const len = @atomicLoad(usize, &this.len, .Monotonic); + if (len == 0) { + return null; + } + var task = this.head.?; + if (task.node.next) |node| { + this.head = @fieldParentPtr(Task, "node", node); + } else { + if (task != this.tail.?) unreachable; + this.tail = null; + this.head = null; + } + + this.len -= 1; + if (len == 0) { + this.tail = null; + } + return task; + } + + /// Create a batch from a single task. + pub fn from(task: *Task) Batch { + return Batch{ + .len = 1, + .head = task, + .tail = task, + }; + } + + /// Another batch into this one, taking ownership of its tasks. + pub fn push(self: *Batch, batch: Batch) void { + if (batch.len == 0) return; + if (self.len == 0) { + self.* = batch; + } else { + self.tail.?.node.next = if (batch.head) |h| &h.node else null; + self.tail = batch.tail; + self.len += batch.len; + } + } + }; + + pub const WaitGroup = struct { + mutex: std.Thread.Mutex = .{}, + counter: u32 = 0, + event: std.Thread.ResetEvent, + + pub fn init(self: *WaitGroup) void { + self.* = .{ + .mutex = .{}, + .counter = 0, + .event = undefined, + }; + } + + pub fn deinit(self: *WaitGroup) void { + self.event.reset(); + self.* = undefined; + } + + pub fn start(self: *WaitGroup) void { + self.mutex.lock(); + defer self.mutex.unlock(); + + self.counter += 1; + } + + pub fn isDone(this: *WaitGroup) bool { + return @atomicLoad(u32, &this.counter, .Monotonic) == 0; + } + + pub fn finish(self: *WaitGroup) void { + self.mutex.lock(); + defer self.mutex.unlock(); + + self.counter -= 1; + + if (self.counter == 0) { + self.event.set(); + } + } + + pub fn wait(self: *WaitGroup) void { + while (true) { + self.mutex.lock(); + + if (self.counter == 0) { + self.mutex.unlock(); + return; + } + + self.mutex.unlock(); + self.event.wait(); + } + } + + pub fn reset(self: *WaitGroup) void { + self.event.reset(); + } + }; + + pub fn ConcurrentFunction( + comptime Function: anytype, + ) type { + return struct { + const Fn = Function; + const Args = std.meta.ArgsTuple(@TypeOf(Fn)); + const Runner = @This(); + thread_pool: *ThreadPool, + states: []Routine = undefined, + batch: Batch = .{}, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator, thread_pool: *ThreadPool, count: usize) !Runner { + return Runner{ + .allocator = allocator, + .thread_pool = thread_pool, + .states = try allocator.alloc(Routine, count), + .batch = .{}, + }; + } + + pub fn call(this: *@This(), args: Args) void { + this.states[this.batch.len] = .{ + .args = args, + }; + this.batch.push(Batch.from(&this.states[this.batch.len].task)); + } + + pub fn run(this: *@This()) void { + this.thread_pool.schedule(this.batch); + } + + pub const Routine = struct { + args: Args, + task: Task = .{ .callback = callback }, + + pub fn callback(task: *Task) void { + var routine = @fieldParentPtr(@This(), "task", task); + @call(.always_inline, Fn, routine.args); + } + }; + + pub fn deinit(this: *@This()) void { + this.allocator.free(this.states); + } + }; + } + + pub fn runner( + this: *ThreadPool, + allocator: std.mem.Allocator, + comptime Function: anytype, + count: usize, + ) !ConcurrentFunction(Function) { + return try ConcurrentFunction(Function).init(allocator, this, count); + } + + /// Loop over an array of tasks and invoke `Run` on each one in a different thread + /// **Blocks the calling thread** until all tasks are completed. + pub fn do( + this: *ThreadPool, + allocator: std.mem.Allocator, + wg: ?*WaitGroup, + ctx: anytype, + comptime Run: anytype, + values: anytype, + ) !void { + return try Do(this, allocator, wg, @TypeOf(ctx), ctx, Run, @TypeOf(values), values, false); + } + + pub fn doPtr( + this: *ThreadPool, + allocator: std.mem.Allocator, + wg: ?*WaitGroup, + ctx: anytype, + comptime Run: anytype, + values: anytype, + ) !void { + return try Do(this, allocator, wg, @TypeOf(ctx), ctx, Run, @TypeOf(values), values, true); + } + + pub fn Do( + this: *ThreadPool, + allocator: std.mem.Allocator, + wg: ?*WaitGroup, + comptime Context: type, + ctx: Context, + comptime Function: anytype, + comptime ValuesType: type, + values: ValuesType, + comptime as_ptr: bool, + ) !void { + if (values.len == 0) + return; + var allocated_wait_group: ?*WaitGroup = null; + defer { + if (allocated_wait_group) |group| { + group.deinit(); + allocator.destroy(group); + } + } + + var wait_group = wg orelse brk: { + allocated_wait_group = try allocator.create(WaitGroup); + allocated_wait_group.?.init(); + break :brk allocated_wait_group.?; + }; + const WaitContext = struct { + wait_group: *WaitGroup = undefined, + ctx: Context, + values: ValuesType, + }; + + const RunnerTask = struct { + task: Task, + ctx: *WaitContext, + i: usize = 0, + + pub fn call(task: *Task) void { + var runner_task = @fieldParentPtr(@This(), "task", task); + const i = runner_task.i; + if (comptime as_ptr) { + Function(runner_task.ctx.ctx, &runner_task.ctx.values[i], i); + } else { + Function(runner_task.ctx.ctx, runner_task.ctx.values[i], i); + } + + runner_task.ctx.wait_group.finish(); + } + }; + var wait_context = allocator.create(WaitContext) catch unreachable; + wait_context.* = .{ + .ctx = ctx, + .wait_group = wait_group, + .values = values, + }; + defer allocator.destroy(wait_context); + var tasks = allocator.alloc(RunnerTask, values.len) catch unreachable; + defer allocator.free(tasks); + var batch: Batch = undefined; + var offset = tasks.len - 1; + + { + tasks[0] = .{ + .i = offset, + .task = .{ .callback = RunnerTask.call }, + .ctx = wait_context, + }; + batch = Batch.from(&tasks[0].task); + } + if (tasks.len > 1) { + for (tasks[1..]) |*runner_task| { + offset -= 1; + runner_task.* = .{ + .i = offset, + .task = .{ .callback = RunnerTask.call }, + .ctx = wait_context, + }; + batch.push(Batch.from(&runner_task.task)); + } + } + + wait_group.counter += @as(u32, @intCast(values.len)); + this.schedule(batch); + wait_group.wait(); + } + + /// Schedule a batch of tasks to be executed by some thread on the thread pool. + pub fn schedule(self: *ThreadPool, batch: Batch) void { + // Sanity check + if (batch.len == 0) { + return; + } + + // Extract out the Node's from the Tasks + var list = Node.List{ + .head = &batch.head.?.node, + .tail = &batch.tail.?.node, + }; + + // Push the task Nodes to the most appropriate queue + if (Thread.current) |thread| { + thread.run_buffer.push(&list) catch thread.run_queue.push(list); + } else { + self.run_queue.push(list); + } + + forceSpawn(self); + } + + pub fn forceSpawn(self: *ThreadPool) void { + // Try to notify a thread + const is_waking = false; + return self.notify(is_waking); + } + + inline fn notify(self: *ThreadPool, is_waking: bool) void { + // Fast path to check the Sync state to avoid calling into notifySlow(). + // If we're waking, then we need to update the state regardless + if (!is_waking) { + const sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + if (sync.notified) { + return; + } + } + + return self.notifySlow(is_waking); + } + + /// Warm the thread pool up to the given number of threads. + /// https://www.youtube.com/watch?v=ys3qcbO5KWw + pub fn warm(self: *ThreadPool, count: u14) void { + var sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + if (sync.spawned >= count) + return; + + const to_spawn = @min(count - sync.spawned, @as(u14, @truncate(self.max_threads))); + while (sync.spawned < to_spawn) { + var new_sync = sync; + new_sync.spawned += 1; + sync = @as(Sync, @bitCast(self.sync.tryCompareAndSwap( + @as(u32, @bitCast(sync)), + @as(u32, @bitCast(new_sync)), + .Release, + .Monotonic, + ) orelse break)); + const spawn_config = if (builtin.os.tag.isDarwin()) + // stack size must be a multiple of page_size + // macOS will fail to spawn a thread if the stack size is not a multiple of page_size + std.Thread.SpawnConfig{ .stack_size = ((std.Thread.SpawnConfig{}).stack_size + (std.mem.page_size / 2) / std.mem.page_size) * std.mem.page_size } + else + std.Thread.SpawnConfig{}; + + const thread = std.Thread.spawn(spawn_config, Thread.run, .{self}) catch return self.unregister(null); + thread.detach(); + } + } + + noinline fn notifySlow(self: *ThreadPool, is_waking: bool) void { + var sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + while (sync.state != .shutdown) { + const can_wake = is_waking or (sync.state == .pending); + if (is_waking) { + assert(sync.state == .waking); + } + + var new_sync = sync; + new_sync.notified = true; + if (can_wake and sync.idle > 0) { // wake up an idle thread + new_sync.state = .signaled; + } else if (can_wake and sync.spawned < self.max_threads) { // spawn a new thread + new_sync.state = .signaled; + new_sync.spawned += 1; + } else if (is_waking) { // no other thread to pass on "waking" status + new_sync.state = .pending; + } else if (sync.notified) { // nothing to update + return; + } + + // Release barrier synchronizes with Acquire in wait() + // to ensure pushes to run queues happen before observing a posted notification. + sync = @as(Sync, @bitCast(self.sync.tryCompareAndSwap( + @as(u32, @bitCast(sync)), + @as(u32, @bitCast(new_sync)), + .Release, + .Monotonic, + ) orelse { + // We signaled to notify an idle thread + if (can_wake and sync.idle > 0) { + return self.idle_event.notify(); + } + + // We signaled to spawn a new thread + if (can_wake and sync.spawned < self.max_threads) { + const spawn_config = if (builtin.os.tag.isDarwin()) + // stack size must be a multiple of page_size + // macOS will fail to spawn a thread if the stack size is not a multiple of page_size + std.Thread.SpawnConfig{ .stack_size = ((std.Thread.SpawnConfig{}).stack_size + (std.mem.page_size / 2) / std.mem.page_size) * std.mem.page_size } + else + std.Thread.SpawnConfig{}; + + const thread = std.Thread.spawn(spawn_config, Thread.run, .{self}) catch return self.unregister(null); + // if (self.name.len > 0) thread.setName(self.name) catch {}; + return thread.detach(); + } + + return; + })); + } + } + + noinline fn wait(self: *ThreadPool, _is_waking: bool) error{Shutdown}!bool { + var is_idle = false; + var is_waking = _is_waking; + var sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + + while (true) { + if (sync.state == .shutdown) return error.Shutdown; + if (is_waking) assert(sync.state == .waking); + + // Consume a notification made by notify(). + if (sync.notified) { + var new_sync = sync; + new_sync.notified = false; + if (is_idle) + new_sync.idle -= 1; + if (sync.state == .signaled) + new_sync.state = .waking; + + // Acquire barrier synchronizes with notify() + // to ensure that pushes to run queue are observed after wait() returns. + sync = @as(Sync, @bitCast(self.sync.tryCompareAndSwap( + @as(u32, @bitCast(sync)), + @as(u32, @bitCast(new_sync)), + .Acquire, + .Monotonic, + ) orelse { + return is_waking or (sync.state == .signaled); + })); + } else if (!is_idle) { + var new_sync = sync; + new_sync.idle += 1; + if (is_waking) + new_sync.state = .pending; + + sync = @as(Sync, @bitCast(self.sync.tryCompareAndSwap( + @as(u32, @bitCast(sync)), + @as(u32, @bitCast(new_sync)), + .Monotonic, + .Monotonic, + ) orelse { + is_waking = false; + is_idle = true; + continue; + })); + } else { + if (Thread.current) |current| { + current.drainIdleEvents(); + } + + self.idle_event.wait(); + sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + } + } + } + + /// Marks the thread pool as shutdown + pub noinline fn shutdown(self: *ThreadPool) void { + var sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + while (sync.state != .shutdown) { + var new_sync = sync; + new_sync.notified = true; + new_sync.state = .shutdown; + new_sync.idle = 0; + + // Full barrier to synchronize with both wait() and notify() + sync = @as(Sync, @bitCast(self.sync.tryCompareAndSwap( + @as(u32, @bitCast(sync)), + @as(u32, @bitCast(new_sync)), + .AcqRel, + .Monotonic, + ) orelse { + // Wake up any threads sleeping on the idle_event. + // TODO: I/O polling notification here. + if (sync.idle > 0) self.idle_event.shutdown(); + return; + })); + } + } + + fn register(noalias self: *ThreadPool, noalias thread: *Thread) void { + // Push the thread onto the threads stack in a lock-free manner. + var threads = self.threads.load(.Monotonic); + while (true) { + thread.next = threads; + threads = self.threads.tryCompareAndSwap( + threads, + thread, + .Release, + .Monotonic, + ) orelse break; + } + } + + pub fn setThreadContext(noalias pool: *ThreadPool, ctx: ?*anyopaque) void { + pool.threadpool_context = ctx; + + var thread = pool.threads.load(.Monotonic) orelse return; + thread.ctx = pool.threadpool_context; + while (thread.next) |next| { + next.ctx = pool.threadpool_context; + thread = next; + } + } + + fn unregister(noalias self: *ThreadPool, noalias maybe_thread: ?*Thread) void { + // Un-spawn one thread, either due to a failed OS thread spawning or the thread is exiting. + const one_spawned = @as(u32, @bitCast(Sync{ .spawned = 1 })); + const sync = @as(Sync, @bitCast(self.sync.fetchSub(one_spawned, .Release))); + assert(sync.spawned > 0); + + // The last thread to exit must wake up the thread pool join()er + // who will start the chain to shutdown all the threads. + if (sync.state == .shutdown and sync.spawned == 1) { + self.join_event.notify(); + } + + // If this is a thread pool thread, wait for a shutdown signal by the thread pool join()er. + const thread = maybe_thread orelse return; + thread.join_event.wait(); + + // After receiving the shutdown signal, shutdown the next thread in the pool. + // We have to do that without touching the thread pool itself since it's memory is invalidated by now. + // So just follow our .next link. + const next_thread = thread.next orelse return; + next_thread.join_event.notify(); + } + + fn join(self: *ThreadPool) void { + // Wait for the thread pool to be shutdown() then for all threads to enter a joinable state + var sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + if (!(sync.state == .shutdown and sync.spawned == 0)) { + self.join_event.wait(); + sync = @as(Sync, @bitCast(self.sync.load(.Monotonic))); + } + + assert(sync.state == .shutdown); + assert(sync.spawned == 0); + + // If there are threads, start off the chain sending it the shutdown signal. + // The thread receives the shutdown signal and sends it to the next thread, and the next.. + const thread = self.threads.load(.Acquire) orelse return; + thread.join_event.notify(); + } + + pub const Thread = struct { + next: ?*Thread = null, + target: ?*Thread = null, + join_event: Event = .{}, + run_queue: Node.Queue = .{}, + idle_queue: Node.Queue = .{}, + run_buffer: Node.Buffer = .{}, + ctx: ?*anyopaque = null, + + pub threadlocal var current: ?*Thread = null; + + pub fn pushIdleTask(self: *Thread, task: *Task) void { + const list = Node.List{ + .head = &task.node, + .tail = &task.node, + }; + self.idle_queue.push(list); + } + + /// Thread entry point which runs a worker for the ThreadPool + fn run(thread_pool: *ThreadPool) void { + var self_ = Thread{}; + var self = &self_; + current = self; + + if (thread_pool.on_thread_spawn) |spawn| { + current.?.ctx = spawn(thread_pool.threadpool_context); + } + + thread_pool.register(self); + + defer thread_pool.unregister(self); + + var is_waking = false; + while (true) { + is_waking = thread_pool.wait(is_waking) catch return; + + while (self.pop(thread_pool)) |result| { + if (result.pushed or is_waking) + thread_pool.notify(is_waking); + is_waking = false; + + const task = @fieldParentPtr(Task, "node", result.node); + (task.callback)(task); + } + + self.drainIdleEvents(); + } + } + + pub fn drainIdleEvents(noalias self: *Thread) void { + var consumer = self.idle_queue.tryAcquireConsumer() catch return; + defer self.idle_queue.releaseConsumer(consumer); + while (self.idle_queue.pop(&consumer)) |node| { + const task = @fieldParentPtr(Task, "node", node); + (task.callback)(task); + } + } + + /// Try to dequeue a Node/Task from the ThreadPool. + /// Spurious reports of dequeue() returning empty are allowed. + pub fn pop(noalias self: *Thread, noalias thread_pool: *ThreadPool) ?Node.Buffer.Stole { + // Check our local buffer first + if (self.run_buffer.pop()) |node| { + return Node.Buffer.Stole{ + .node = node, + .pushed = false, + }; + } + + // Then check our local queue + if (self.run_buffer.consume(&self.run_queue)) |stole| { + return stole; + } + + // Then the global queue + if (self.run_buffer.consume(&thread_pool.run_queue)) |stole| { + return stole; + } + + // Then try work stealing from other threads + var num_threads: u32 = @as(Sync, @bitCast(thread_pool.sync.load(.Monotonic))).spawned; + while (num_threads > 0) : (num_threads -= 1) { + // Traverse the stack of registered threads on the thread pool + const target = self.target orelse thread_pool.threads.load(.Acquire) orelse unreachable; + self.target = target.next; + + // Try to steal from their queue first to avoid contention (the target steal's from queue last). + if (self.run_buffer.consume(&target.run_queue)) |stole| { + return stole; + } + + // Skip stealing from the buffer if we're the target. + // We still steal from our own queue above given it may have just been locked the first time we tried. + if (target == self) { + continue; + } + + // Steal from the buffer of a remote thread as a last resort + if (self.run_buffer.steal(&target.run_buffer)) |stole| { + return stole; + } + } + + return null; + } + }; + + /// An event which stores 1 semaphore token and is multi-threaded safe. + /// The event can be shutdown(), waking up all wait()ing threads and + /// making subsequent wait()'s return immediately. + const Event = struct { + state: Atomic(u32) = Atomic(u32).init(EMPTY), + + const EMPTY = 0; + const WAITING = 1; + pub const NOTIFIED = 2; + const SHUTDOWN = 3; + + /// Wait for and consume a notification + /// or wait for the event to be shutdown entirely + noinline fn wait(self: *Event) void { + var acquire_with: u32 = EMPTY; + var state = self.state.load(.Monotonic); + + while (true) { + // If we're shutdown then exit early. + // Acquire barrier to ensure operations before the shutdown() are seen after the wait(). + // Shutdown is rare so it's better to have an Acquire barrier here instead of on CAS failure + load which are common. + if (state == SHUTDOWN) { + std.atomic.fence(.Acquire); + return; + } + + // Consume a notification when it pops up. + // Acquire barrier to ensure operations before the notify() appear after the wait(). + if (state == NOTIFIED) { + state = self.state.tryCompareAndSwap( + state, + acquire_with, + .Acquire, + .Monotonic, + ) orelse return; + continue; + } + + // There is no notification to consume, we should wait on the event by ensuring its WAITING. + if (state != WAITING) blk: { + state = self.state.tryCompareAndSwap( + state, + WAITING, + .Monotonic, + .Monotonic, + ) orelse break :blk; + continue; + } + + // Wait on the event until a notify() or shutdown(). + // If we wake up to a notification, we must acquire it with WAITING instead of EMPTY + // since there may be other threads sleeping on the Futex who haven't been woken up yet. + // + // Acquiring to WAITING will make the next notify() or shutdown() wake a sleeping futex thread + // who will either exit on SHUTDOWN or acquire with WAITING again, ensuring all threads are awoken. + // This unfortunately results in the last notify() or shutdown() doing an extra futex wake but that's fine. + Futex.wait(&self.state, WAITING); + state = self.state.load(.Monotonic); + acquire_with = WAITING; + } + } + + /// Wait for and consume a notification + /// or wait for the event to be shutdown entirely + noinline fn waitFor(self: *Event, timeout: usize) void { + _ = timeout; + var acquire_with: u32 = EMPTY; + var state = self.state.load(.Monotonic); + + while (true) { + // If we're shutdown then exit early. + // Acquire barrier to ensure operations before the shutdown() are seen after the wait(). + // Shutdown is rare so it's better to have an Acquire barrier here instead of on CAS failure + load which are common. + if (state == SHUTDOWN) { + std.atomic.fence(.Acquire); + return; + } + + // Consume a notification when it pops up. + // Acquire barrier to ensure operations before the notify() appear after the wait(). + if (state == NOTIFIED) { + state = self.state.tryCompareAndSwap( + state, + acquire_with, + .Acquire, + .Monotonic, + ) orelse return; + continue; + } + + // There is no notification to consume, we should wait on the event by ensuring its WAITING. + if (state != WAITING) blk: { + state = self.state.tryCompareAndSwap( + state, + WAITING, + .Monotonic, + .Monotonic, + ) orelse break :blk; + continue; + } + + // Wait on the event until a notify() or shutdown(). + // If we wake up to a notification, we must acquire it with WAITING instead of EMPTY + // since there may be other threads sleeping on the Futex who haven't been woken up yet. + // + // Acquiring to WAITING will make the next notify() or shutdown() wake a sleeping futex thread + // who will either exit on SHUTDOWN or acquire with WAITING again, ensuring all threads are awoken. + // This unfortunately results in the last notify() or shutdown() doing an extra futex wake but that's fine. + Futex.wait(&self.state, WAITING); + state = self.state.load(.Monotonic); + acquire_with = WAITING; + } + } + + /// Post a notification to the event if it doesn't have one already + /// then wake up a waiting thread if there is one as well. + fn notify(self: *Event) void { + return self.wake(NOTIFIED, 1); + } + + /// Marks the event as shutdown, making all future wait()'s return immediately. + /// Then wakes up any threads currently waiting on the Event. + fn shutdown(self: *Event) void { + return self.wake(SHUTDOWN, std.math.maxInt(u32)); + } + + fn wake(self: *Event, release_with: u32, wake_threads: u32) void { + // Update the Event to notify it with the new `release_with` state (either NOTIFIED or SHUTDOWN). + // Release barrier to ensure any operations before this are this to happen before the wait() in the other threads. + const state = self.state.swap(release_with, .Release); + + // Only wake threads sleeping in futex if the state is WAITING. + // Avoids unnecessary wake ups. + if (state == WAITING) { + Futex.wake(&self.state, wake_threads); + } + } + }; + + /// Linked list intrusive memory node and lock-free data structures to operate with it + pub const Node = struct { + next: ?*Node = null, + + /// A linked list of Nodes + const List = struct { + head: *Node, + tail: *Node, + }; + + /// An unbounded multi-producer-(non blocking)-multi-consumer queue of Node pointers. + const Queue = struct { + stack: Atomic(usize) = Atomic(usize).init(0), + cache: ?*Node = null, + + const HAS_CACHE: usize = 0b01; + const IS_CONSUMING: usize = 0b10; + const PTR_MASK: usize = ~(HAS_CACHE | IS_CONSUMING); + + comptime { + assert(@alignOf(Node) >= ((IS_CONSUMING | HAS_CACHE) + 1)); + } + + fn push(noalias self: *Queue, list: List) void { + var stack = self.stack.load(.Monotonic); + while (true) { + // Attach the list to the stack (pt. 1) + list.tail.next = @as(?*Node, @ptrFromInt(stack & PTR_MASK)); + + // Update the stack with the list (pt. 2). + // Don't change the HAS_CACHE and IS_CONSUMING bits of the consumer. + var new_stack = @intFromPtr(list.head); + assert(new_stack & ~PTR_MASK == 0); + new_stack |= (stack & ~PTR_MASK); + + // Push to the stack with a release barrier for the consumer to see the proper list links. + stack = self.stack.tryCompareAndSwap( + stack, + new_stack, + .Release, + .Monotonic, + ) orelse break; + } + } + + fn tryAcquireConsumer(self: *Queue) error{ Empty, Contended }!?*Node { + var stack = self.stack.load(.Monotonic); + while (true) { + if (stack & IS_CONSUMING != 0) + return error.Contended; // The queue already has a consumer. + if (stack & (HAS_CACHE | PTR_MASK) == 0) + return error.Empty; // The queue is empty when there's nothing cached and nothing in the stack. + + // When we acquire the consumer, also consume the pushed stack if the cache is empty. + var new_stack = stack | HAS_CACHE | IS_CONSUMING; + if (stack & HAS_CACHE == 0) { + assert(stack & PTR_MASK != 0); + new_stack &= ~PTR_MASK; + } + + // Acquire barrier on getting the consumer to see cache/Node updates done by previous consumers + // and to ensure our cache/Node updates in pop() happen after that of previous consumers. + stack = self.stack.tryCompareAndSwap( + stack, + new_stack, + .Acquire, + .Monotonic, + ) orelse return self.cache orelse @as(*Node, @ptrFromInt(stack & PTR_MASK)); + } + } + + fn releaseConsumer(noalias self: *Queue, noalias consumer: ?*Node) void { + // Stop consuming and remove the HAS_CACHE bit as well if the consumer's cache is empty. + // When HAS_CACHE bit is zeroed, the next consumer will acquire the pushed stack nodes. + var remove = IS_CONSUMING; + if (consumer == null) + remove |= HAS_CACHE; + + // Release the consumer with a release barrier to ensure cache/node accesses + // happen before the consumer was released and before the next consumer starts using the cache. + self.cache = consumer; + const stack = self.stack.fetchSub(remove, .Release); + assert(stack & remove != 0); + } + + fn pop(noalias self: *Queue, noalias consumer_ref: *?*Node) ?*Node { + // Check the consumer cache (fast path) + if (consumer_ref.*) |node| { + consumer_ref.* = node.next; + return node; + } + + // Load the stack to see if there was anything pushed that we could grab. + var stack = self.stack.load(.Monotonic); + assert(stack & IS_CONSUMING != 0); + if (stack & PTR_MASK == 0) { + return null; + } + + // Nodes have been pushed to the stack, grab then with an Acquire barrier to see the Node links. + stack = self.stack.swap(HAS_CACHE | IS_CONSUMING, .Acquire); + assert(stack & IS_CONSUMING != 0); + assert(stack & PTR_MASK != 0); + + const node = @as(*Node, @ptrFromInt(stack & PTR_MASK)); + consumer_ref.* = node.next; + return node; + } + }; + + /// A bounded single-producer, multi-consumer ring buffer for node pointers. + const Buffer = struct { + head: Atomic(Index) = Atomic(Index).init(0), + tail: Atomic(Index) = Atomic(Index).init(0), + array: [capacity]Atomic(*Node) = undefined, + + const Index = u32; + const capacity = 256; // Appears to be a pretty good trade-off in space vs contended throughput + comptime { + assert(std.math.maxInt(Index) >= capacity); + assert(std.math.isPowerOfTwo(capacity)); + } + + fn push(noalias self: *Buffer, noalias list: *List) error{Overflow}!void { + var head = self.head.load(.Monotonic); + var tail = self.tail.loadUnchecked(); // we're the only thread that can change this + + while (true) { + var size = tail -% head; + assert(size <= capacity); + + // Push nodes from the list to the buffer if it's not empty.. + if (size < capacity) { + var nodes: ?*Node = list.head; + while (size < capacity) : (size += 1) { + const node = nodes orelse break; + nodes = node.next; + + // Array written atomically with weakest ordering since it could be getting atomically read by steal(). + self.array[tail % capacity].store(node, .Unordered); + tail +%= 1; + } + + // Release barrier synchronizes with Acquire loads for steal()ers to see the array writes. + self.tail.store(tail, .Release); + + // Update the list with the nodes we pushed to the buffer and try again if there's more. + list.head = nodes orelse return; + std.atomic.spinLoopHint(); + head = self.head.load(.Monotonic); + continue; + } + + // Try to steal/overflow half of the tasks in the buffer to make room for future push()es. + // Migrating half amortizes the cost of stealing while requiring future pops to still use the buffer. + // Acquire barrier to ensure the linked list creation after the steal only happens after we successfully steal. + var migrate = size / 2; + head = self.head.tryCompareAndSwap( + head, + head +% migrate, + .Acquire, + .Monotonic, + ) orelse { + // Link the migrated Nodes together + const first = self.array[head % capacity].loadUnchecked(); + while (migrate > 0) : (migrate -= 1) { + const prev = self.array[head % capacity].loadUnchecked(); + head +%= 1; + prev.next = self.array[head % capacity].loadUnchecked(); + } + + // Append the list that was supposed to be pushed to the end of the migrated Nodes + const last = self.array[(head -% 1) % capacity].loadUnchecked(); + last.next = list.head; + list.tail.next = null; + + // Return the migrated nodes + the original list as overflowed + list.head = first; + return error.Overflow; + }; + } + } + + fn pop(self: *Buffer) ?*Node { + var head = self.head.load(.Monotonic); + var tail = self.tail.loadUnchecked(); // we're the only thread that can change this + + while (true) { + // Quick sanity check and return null when not empty + var size = tail -% head; + assert(size <= capacity); + if (size == 0) { + return null; + } + + // Dequeue with an acquire barrier to ensure any writes done to the Node + // only happens after we successfully claim it from the array. + head = self.head.tryCompareAndSwap( + head, + head +% 1, + .Acquire, + .Monotonic, + ) orelse return self.array[head % capacity].loadUnchecked(); + } + } + + const Stole = struct { + node: *Node, + pushed: bool, + }; + + fn consume(noalias self: *Buffer, noalias queue: *Queue) ?Stole { + var consumer = queue.tryAcquireConsumer() catch return null; + defer queue.releaseConsumer(consumer); + + const head = self.head.load(.Monotonic); + const tail = self.tail.loadUnchecked(); // we're the only thread that can change this + + const size = tail -% head; + assert(size <= capacity); + assert(size == 0); // we should only be consuming if our array is empty + + // Pop nodes from the queue and push them to our array. + // Atomic stores to the array as steal() threads may be atomically reading from it. + var pushed: Index = 0; + while (pushed < capacity) : (pushed += 1) { + const node = queue.pop(&consumer) orelse break; + self.array[(tail +% pushed) % capacity].store(node, .Unordered); + } + + // We will be returning one node that we stole from the queue. + // Get an extra, and if that's not possible, take one from our array. + const node = queue.pop(&consumer) orelse blk: { + if (pushed == 0) return null; + pushed -= 1; + break :blk self.array[(tail +% pushed) % capacity].loadUnchecked(); + }; + + // Update the array tail with the nodes we pushed to it. + // Release barrier to synchronize with Acquire barrier in steal()'s to see the written array Nodes. + if (pushed > 0) self.tail.store(tail +% pushed, .Release); + return Stole{ + .node = node, + .pushed = pushed > 0, + }; + } + + fn steal(noalias self: *Buffer, noalias buffer: *Buffer) ?Stole { + const head = self.head.load(.Monotonic); + const tail = self.tail.loadUnchecked(); // we're the only thread that can change this + + const size = tail -% head; + assert(size <= capacity); + assert(size == 0); // we should only be stealing if our array is empty + + while (true) : (std.atomic.spinLoopHint()) { + const buffer_head = buffer.head.load(.Acquire); + const buffer_tail = buffer.tail.load(.Acquire); + + // Overly large size indicates the the tail was updated a lot after the head was loaded. + // Reload both and try again. + const buffer_size = buffer_tail -% buffer_head; + if (buffer_size > capacity) { + continue; + } + + // Try to steal half (divCeil) to amortize the cost of stealing from other threads. + const steal_size = buffer_size - (buffer_size / 2); + if (steal_size == 0) { + return null; + } + + // Copy the nodes we will steal from the target's array to our own. + // Atomically load from the target buffer array as it may be pushing and atomically storing to it. + // Atomic store to our array as other steal() threads may be atomically loading from it as above. + var i: Index = 0; + while (i < steal_size) : (i += 1) { + const node = buffer.array[(buffer_head +% i) % capacity].load(.Unordered); + self.array[(tail +% i) % capacity].store(node, .Unordered); + } + + // Try to commit the steal from the target buffer using: + // - an Acquire barrier to ensure that we only interact with the stolen Nodes after the steal was committed. + // - a Release barrier to ensure that the Nodes are copied above prior to the committing of the steal + // because if they're copied after the steal, the could be getting rewritten by the target's push(). + _ = buffer.head.compareAndSwap( + buffer_head, + buffer_head +% steal_size, + .AcqRel, + .Monotonic, + ) orelse { + // Pop one from the nodes we stole as we'll be returning it + const pushed = steal_size - 1; + const node = self.array[(tail +% pushed) % capacity].loadUnchecked(); + + // Update the array tail with the nodes we pushed to it. + // Release barrier to synchronize with Acquire barrier in steal()'s to see the written array Nodes. + if (pushed > 0) self.tail.store(tail +% pushed, .Release); + return Stole{ + .node = node, + .pushed = pushed > 0, + }; + }; + } + } + }; + }; +}; + +test "parallel for loop" { + var thread_pool = ThreadPool.init(.{ .max_threads = 12 }); + var sleepy_time: u32 = 100; + var huge_array = &[_]u32{ + sleepy_time + std.rand.DefaultPrng.init(1).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(2).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(3).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(4).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(5).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(6).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(7).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(8).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(9).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(10).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(11).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(12).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(13).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(14).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(15).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(16).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(17).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(18).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(19).random().uintAtMost(u32, 20), + sleepy_time + std.rand.DefaultPrng.init(20).random().uintAtMost(u32, 20), + }; + const Runner = struct { + completed: usize = 0, + total: usize = 0, + pub fn run(ctx: *@This(), value: u32, _: usize) void { + std.time.sleep(value); + ctx.completed += 1; + std.debug.assert(ctx.completed <= ctx.total); + } + }; + var runny = try std.heap.page_allocator.create(Runner); + runny.* = .{ .total = huge_array.len }; + try thread_pool.doAndWait(std.heap.page_allocator, null, runny, Runner.run, std.mem.span(huge_array)); + try std.testing.expectEqual(huge_array.len, runny.completed); +} + +pub fn NewWorkPool(comptime max_threads: ?usize) type { + return struct { + var pool: ThreadPool = undefined; + var loaded: bool = false; + + fn create() *ThreadPool { + @setCold(true); + + pool = ThreadPool.init(.{ + .max_threads = max_threads orelse @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), + .stack_size = 2 * 1024 * 1024, + }); + return &pool; + } + + pub fn deinit() void { + get().deinit(); + } + + pub inline fn get() *ThreadPool { + // lil racy + if (loaded) return &pool; + loaded = true; + + return create(); + } + + pub fn scheduleBatch(batch: ThreadPool.Batch) void { + get().schedule(batch); + } + + pub fn scheduleTask(task: *ThreadPool.Task) void { + get().schedule(ThreadPool.Batch.from(task)); + } + + pub fn go(allocator: std.mem.Allocator, comptime Context: type, context: Context, comptime function: *const fn (Context) void) !void { + const TaskType = struct { + task: ThreadPool.Task, + context: Context, + allocator: std.mem.Allocator, + + pub fn callback(task: *ThreadPool.Task) void { + var this_task = @fieldParentPtr(@This(), "task", task); + function(this_task.context); + this_task.allocator.destroy(this_task); + } + }; + + var task_ = try allocator.create(TaskType); + task_.* = .{ + .task = .{ .callback = TaskType.callback }, + .context = context, + .allocator = allocator, + }; + scheduleTask(&task_.task); + } + }; +} + +pub const WorkPool = NewWorkPool(null); +const testing = std.testing; + +const CrdsTableTrimContext = struct { + index: usize, + max_trim: usize, + self: *CrdsTable, +}; + +const CrdsTable = struct { + pub fn trim(context: CrdsTableTrimContext) void { + const self = context.self; + _ = self; + const max_trim = context.max_trim; + _ = max_trim; + const index = context.index; + _ = index; + + std.debug.print("I ran!\n\n", .{}); + // todo + + } +}; + +test "sync.thread_pool: workpool works" { + var crds: CrdsTable = CrdsTable{}; + var a = CrdsTableTrimContext{ .index = 1, .max_trim = 2, .self = &crds }; + defer WorkPool.deinit(); + try WorkPool.go(testing.allocator, CrdsTableTrimContext, a, CrdsTable.trim); + + std.time.sleep(std.time.ns_per_s * 1); + WorkPool.pool.shutdown(); +} \ No newline at end of file From 7d142a00ba1ed4115ff49b740e4fb3f176b89db3 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 20 Sep 2023 19:56:12 -0400 Subject: [PATCH 07/72] parallel processing pull requests working --- src/benchmarks.zig | 10 +- src/gossip/active_set.zig | 6 +- src/gossip/fuzz.zig | 21 +- src/gossip/gossip_service.zig | 468 ++++++++++++++++++++++++++-------- src/gossip/pull_response.zig | 180 +------------ src/lib.zig | 1 + src/net/echo.zig | 10 +- src/net/net.zig | 2 +- src/sync/thread_pool.zig | 267 +++++++++---------- 9 files changed, 527 insertions(+), 438 deletions(-) diff --git a/src/benchmarks.zig b/src/benchmarks.zig index 329add027..f1edafb59 100644 --- a/src/benchmarks.zig +++ b/src/benchmarks.zig @@ -41,7 +41,13 @@ pub fn main() !void { if (std.mem.startsWith(u8, "gossip", filter)) { try benchmark( - @import("gossip/gossip_service.zig").BenchmarkMessageProcessing, + @import("gossip/gossip_service.zig").BenchmarkGossipServiceGeneral, + max_time_per_bench, + TimeUnits.milliseconds, + ); + + try benchmark( + @import("gossip/gossip_service.zig").BenchmarkGossipServicePullRequest, max_time_per_bench, TimeUnits.milliseconds, ); @@ -156,7 +162,7 @@ pub fn benchmark( var timer = try time.Timer.start(); inline for (functions, 0..) |def, fcni| { - if (fcni > 0) + if (fcni > 0) std.debug.print("---\n", .{}); inline for (args, 0..) |arg, index| { diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index a3234350c..fcda5fa37 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -10,7 +10,7 @@ const CrdsValue = crds.CrdsValue; const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Pubkey = @import("../core/pubkey.zig").Pubkey; -const get_wallclock_ms = @import("../gossip/crds.zig").getWallclockMs; +const getWallclockMs = @import("../gossip/crds.zig").getWallclockMs; const _crds_table = @import("../gossip/crds_table.zig"); const CrdsTable = _crds_table.CrdsTable; @@ -66,7 +66,7 @@ pub const ActiveSet = struct { return; } const size = @min(crds_peers.len, NUM_ACTIVE_SET_ENTRIES); - var rng = std.rand.DefaultPrng.init(get_wallclock_ms()); + var rng = std.rand.DefaultPrng.init(getWallclockMs()); pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); @@ -151,7 +151,7 @@ test "gossip.active_set: init/deinit" { var value = try CrdsValue.initSigned(crds.CrdsData{ .LegacyContactInfo = data, }, &keypair); - try crds_table.insert(value, get_wallclock_ms()); + try crds_table.insert(value, getWallclockMs()); } var active_set = ActiveSet.init(alloc); diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 709b79a50..476af3fc0 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -19,7 +19,7 @@ const AtomicBool = std.atomic.Atomic(bool); const SocketAddr = @import("../net/net.zig").SocketAddr; const Pubkey = @import("../core/pubkey.zig").Pubkey; -const get_wallclock_ms = @import("crds.zig").getWallclockMs; +const getWallclockMs = @import("crds.zig").getWallclockMs; const Bloom = @import("../bloom/bloom.zig").Bloom; const network = @import("zig-network"); @@ -126,7 +126,7 @@ pub fn randomPushMessage(rng: std.rand.Random, keypair: *const KeyPair, to_addr: &to_addr, ChunkType.PushMessage, ); - return packets; + return packets.?; } pub fn randomPullResponse(rng: std.rand.Random, keypair: *const KeyPair, to_addr: EndPoint) !std.ArrayList(Packet) { @@ -146,7 +146,7 @@ pub fn randomPullResponse(rng: std.rand.Random, keypair: *const KeyPair, to_addr &to_addr, ChunkType.PullResponse, ); - return packets; + return packets.?; } pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, keypair: *const KeyPair, to_addr: EndPoint) !Packet { @@ -197,7 +197,7 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key filter.mask = filters.items[0].mask; filter.mask_bits = filters.items[0].mask_bits; - for (filters.items[1..]) |*filter_i| { + for (filters.items[1..]) |*filter_i| { filter_i.filter.deinit(); } filters.deinit(); @@ -209,7 +209,7 @@ pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, key var msg_slice = try bincode.writeToSlice(&packet_buf, msg, bincode.Params{}); var packet = Packet.init(to_addr, packet_buf, msg_slice.len); - if (!invalid_filter) { + if (!invalid_filter) { filter.filter.deinit(); } @@ -254,7 +254,7 @@ pub fn main() !void { if (maybe_seed) |seed_str| { break :blk try std.fmt.parseInt(u64, seed_str, 10); } else { - break :blk get_wallclock_ms(); + break :blk getWallclockMs(); } }; @@ -292,17 +292,16 @@ pub fn main() !void { .noop, ); - var fuzz_handle = try std.Thread.spawn( - .{}, GossipService.runSpy, .{ &gossip_service_fuzzer }); + var fuzz_handle = try std.Thread.spawn(.{}, GossipService.runSpy, .{&gossip_service_fuzzer}); - // std.debug.print("setting up", .{}); - // while (true) { + // std.debug.print("setting up", .{}); + // while (true) { // var lg = gossip_service_fuzzer.crds_table_rw.read(); // var table: *const CrdsTable = lg.get(); // var n_contacts = table.contact_infos.iterator().len; // lg.unlock(); - // if (n_contacts > 0) { + // if (n_contacts > 0) { // break; // } // std.debug.print(".", .{}); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 5fddc7f4c..af839af7b 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -5,6 +5,10 @@ const EndPoint = network.EndPoint; const Packet = @import("packet.zig").Packet; const PACKET_DATA_SIZE = @import("packet.zig").PACKET_DATA_SIZE; +const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; +const Task = ThreadPool.Task; +const Batch = ThreadPool.Batch; + const Thread = std.Thread; const AtomicBool = std.atomic.Atomic(bool); const UdpSocket = network.Socket; @@ -25,7 +29,7 @@ const CrdsValue = crds.CrdsValue; const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Pubkey = @import("../core/pubkey.zig").Pubkey; -const get_wallclock_ms = @import("../gossip/crds.zig").getWallclockMs; +const getWallclockMs = @import("../gossip/crds.zig").getWallclockMs; const _crds_table = @import("../gossip/crds_table.zig"); const CrdsTable = _crds_table.CrdsTable; @@ -73,6 +77,8 @@ pub const GOSSIP_PING_CACHE_CAPACITY: usize = 65536; pub const GOSSIP_PING_CACHE_TTL_NS: u64 = std.time.ns_per_s * 1280; pub const GOSSIP_PING_CACHE_RATE_LIMIT_DELAY_NS: u64 = std.time.ns_per_s * (1280 / 64); +pub const MAX_NUM_CRDS_VALUES_PULL_RESPONSE = 20; // TODO: this is approx the rust one -- should tune + /// Maximum number of origin nodes that a PruneData may contain, such that the /// serialized size of the PruneMessage stays below PACKET_DATA_SIZE. pub const MAX_PRUNE_DATA_NODES: usize = 32; @@ -107,6 +113,7 @@ pub const GossipService = struct { ping_cache_rw: RwMux(PingCache), echo_server: echo.Server, logger: Logger, + thread_pool: *ThreadPool, // used for benchmarking messages_processed: std.atomic.Atomic(usize) = std.atomic.Atomic(usize).init(0), @@ -146,7 +153,13 @@ pub const GossipService = struct { var failed_pull_hashes = HashTimeQueue.init(allocator); var push_msg_q = std.ArrayList(CrdsValue).init(allocator); - var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger); + var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); + + var thread_pool = try allocator.create(ThreadPool); + thread_pool.* = ThreadPool.init(.{ + .max_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), + .stack_size = 2 * 1024 * 1024, + }); return Self{ .my_contact_info = my_contact_info, @@ -174,6 +187,7 @@ pub const GossipService = struct { ), .echo_server = echo_server, .logger = logger, + .thread_pool = thread_pool, }; } @@ -198,6 +212,8 @@ pub const GossipService = struct { self.entrypoints.deinit(); + self.allocator.destroy(self.thread_pool); + deinitRwMux(&self.crds_table_rw); deinitRwMux(&self.active_set_rw); deinitRwMux(&self.ping_cache_rw); @@ -328,10 +344,20 @@ pub const GossipService = struct { self.logger.debugf("verify_packets loop closed\n", .{}); } + pub const PullRequestMessage = struct { + filter: CrdsFilter, + value: CrdsValue, + from_endpoint: EndPoint, + }; + /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; var msg_count: usize = 0; + + var pull_requests = try std.ArrayList(PullRequestMessage).initCapacity(self.allocator, 100); + defer pull_requests.deinit(); + while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { @@ -347,14 +373,13 @@ pub const GossipService = struct { defer self.verified_incoming_channel.allocator.free(protocol_messages); msg_count += protocol_messages.len; - for (protocol_messages) |protocol_message| { - var message: Protocol = protocol_message.message; + for (protocol_messages) |*protocol_message| { var from_endpoint: EndPoint = protocol_message.from_endpoint; - switch (message) { + switch (protocol_message.message) { .PushMessage => |*push| { // var x_timer = std.time.Timer.start() catch unreachable; - // defer { + // defer { // const elapsed = x_timer.read(); // std.debug.print("push_message took {}ns\n", .{elapsed}); // } @@ -394,7 +419,7 @@ pub const GossipService = struct { }, .PullResponse => |*pull| { // var x_timer = std.time.Timer.start() catch unreachable; - // defer { + // defer { // const elapsed = x_timer.read(); // std.debug.print("pull_response took {}ns\n", .{elapsed}); // } @@ -419,14 +444,13 @@ pub const GossipService = struct { }, .PullRequest => |*pull| { // var x_timer = std.time.Timer.start() catch unreachable; - // defer { + // defer { // const elapsed = x_timer.read(); // std.debug.print("pull_request took {}ns\n", .{elapsed}); // } - var pull_filter: CrdsFilter = pull[0]; + // var pull_filter: CrdsFilter = pull[0]; var pull_value: CrdsValue = pull[1]; // contact info - switch (pull_value.data) { .LegacyContactInfo => |*info| { if (info.id.equals(&self.my_pubkey)) { @@ -438,40 +462,46 @@ pub const GossipService = struct { else => continue, } - var endpoint_buf = std.ArrayList(u8).init(self.allocator); - try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); - defer endpoint_buf.deinit(); - - var pull_log_entry = self.logger - .field("from_endpoint", endpoint_buf.items) - .field("from_pubkey", &pull_value.id().string()); - - var maybe_packets = self.handlePullRequest( - pull_value, - pull_filter, - from_endpoint, - pull_log_entry, - ) catch |err| { - pull_log_entry.field("error", @errorName(err)) - .err("error handling pull request"); - continue; - }; - - if (maybe_packets == null) { - pull_log_entry.field("num_packets_resp", 0) - .info("received pull request"); - continue; - } + try pull_requests.append(.{ + .filter = pull[0], + .value = pull[1], + .from_endpoint = from_endpoint, + }); + + // var endpoint_buf = std.ArrayList(u8).init(self.allocator); + // try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); + // defer endpoint_buf.deinit(); + + // var pull_log_entry = self.logger + // .field("from_endpoint", endpoint_buf.items) + // .field("from_pubkey", &pull_value.id().string()); + + // var maybe_packets = self.handlePullRequest( + // pull_value, + // pull_filter, + // from_endpoint, + // pull_log_entry, + // ) catch |err| { + // pull_log_entry.field("error", @errorName(err)) + // .err("error handling pull request"); + // continue; + // }; + + // if (maybe_packets == null) { + // pull_log_entry.field("num_packets_resp", 0) + // .info("received pull request"); + // continue; + // } - var packets = maybe_packets.?; - defer packets.deinit(); + // var packets = maybe_packets.?; + // defer packets.deinit(); - pull_log_entry.field("num_packets_resp", packets.items.len) - .info("received pull request"); + // pull_log_entry.field("num_packets_resp", packets.items.len) + // .info("received pull request"); - for (packets.items) |packet| { - try self.packet_outgoing_channel.send(packet); - } + // for (packets.items) |packet| { + // try self.packet_outgoing_channel.send(packet); + // } }, .PruneMessage => |*prune| { const prune_msg: PruneData = prune[1]; @@ -528,7 +558,7 @@ pub const GossipService = struct { var ping_cache: *PingCache = ping_cache_lock.mut(); const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); - _ = ping_cache.receviedPong(pong, SocketAddr.fromEndpoint(from_endpoint), now); + _ = ping_cache.receviedPong(pong, SocketAddr.fromEndpoint(&from_endpoint), now); } self.logger @@ -539,9 +569,16 @@ pub const GossipService = struct { } } + // handle batch messages + self.handleBatchPullRequest(pull_requests); + for (pull_requests.items) |*pr| { + pr.filter.deinit(); + } + pull_requests.clearRetainingCapacity(); + { // var table_timer = std.time.Timer.start() catch unreachable; - // defer { + // defer { // const elapsed = table_timer.read(); // std.debug.print("crds table trim took {}ns\n", .{elapsed}); // } @@ -554,8 +591,8 @@ pub const GossipService = struct { }; } - // const elapsed = timer.read(); - // std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + const elapsed = timer.read(); + std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); self.messages_processed.store(msg_count, std.atomic.Ordering.Unordered); } @@ -573,7 +610,7 @@ pub const GossipService = struct { var should_send_pull_requests = true; while (!self.exit.load(std.atomic.Ordering.Unordered)) { - const top_of_loop_ts = get_wallclock_ms(); + const top_of_loop_ts = getWallclockMs(); // TODO: send ping messages based on PingCache @@ -596,7 +633,7 @@ pub const GossipService = struct { should_send_pull_requests = !should_send_pull_requests; // new push msgs - self.drainPushQueueToCrdsTable(get_wallclock_ms()); + self.drainPushQueueToCrdsTable(getWallclockMs()); var maybe_push_packets = self.buildPushMessages(&push_cursor) catch |e| blk: { self.logger.debugf("failed to generate push messages: {any}\n", .{e}); break :blk null; @@ -609,12 +646,12 @@ pub const GossipService = struct { } // trim data - self.trimMemory(get_wallclock_ms()) catch @panic("out of memory"); + self.trimMemory(getWallclockMs()) catch @panic("out of memory"); // periodic things if (top_of_loop_ts - last_push_ts > CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS / 2) { // update wallclock and sign - self.my_contact_info.wallclock = get_wallclock_ms(); + self.my_contact_info.wallclock = getWallclockMs(); var my_contact_info_value = try crds.CrdsValue.initSigned(crds.CrdsData{ .LegacyContactInfo = self.my_contact_info, }, &self.my_keypair); @@ -630,11 +667,11 @@ pub const GossipService = struct { self.rotateActiveSet() catch @panic("out of memory"); - last_push_ts = get_wallclock_ms(); + last_push_ts = getWallclockMs(); } // sleep - const elapsed_ts = get_wallclock_ms() - top_of_loop_ts; + const elapsed_ts = getWallclockMs() - top_of_loop_ts; if (elapsed_ts < GOSSIP_SLEEP_MILLIS) { const time_left_ms = GOSSIP_SLEEP_MILLIS - elapsed_ts; std.time.sleep(time_left_ms * std.time.ns_per_ms); @@ -646,7 +683,7 @@ pub const GossipService = struct { pub fn rotateActiveSet( self: *Self, ) error{ OutOfMemory, SerializationError, ChannelClosed }!void { - const now = get_wallclock_ms(); + const now = getWallclockMs(); var buf: [NUM_ACTIVE_SET_ENTRIES]crds.LegacyContactInfo = undefined; var gossip_peers = self.getGossipNodes(&buf, NUM_ACTIVE_SET_ENTRIES, now); @@ -692,7 +729,7 @@ pub const GossipService = struct { return null; } - const now = get_wallclock_ms(); + const now = getWallclockMs(); var total_byte_size: usize = 0; // find new values in crds table @@ -769,16 +806,17 @@ pub const GossipService = struct { const to_endpoint: *const EndPoint = push_entry.key_ptr; // send the values as a pull response - var endpoint_packets = try crdsValuesToPackets( + var maybe_endpoint_packets = try crdsValuesToPackets( self.allocator, &self.my_pubkey, crds_values.items, to_endpoint, ChunkType.PushMessage, ); - defer endpoint_packets.deinit(); - - try packets.appendSlice(endpoint_packets.items); + if (maybe_endpoint_packets) |endpoint_packets| { + defer endpoint_packets.deinit(); + try packets.appendSlice(endpoint_packets.items); + } } return packets; @@ -793,7 +831,7 @@ pub const GossipService = struct { ) !std.ArrayList(Packet) { // get nodes from crds table var buf: [MAX_NUM_PULL_REQUESTS]crds.LegacyContactInfo = undefined; - const now = get_wallclock_ms(); + const now = getWallclockMs(); var peers = self.getGossipNodes( &buf, MAX_NUM_PULL_REQUESTS, @@ -907,6 +945,194 @@ pub const GossipService = struct { return output; } + fn handleBatchPullRequest( + self: *Self, + pull_requests: std.ArrayList(PullRequestMessage), + ) void { + // self.handleBatchPullRequestSequential(pull_requests) catch {}; + self.handleBatchPullRequestParallel(pull_requests) catch |err| { + std.debug.print("handleBatchPullRequestParallel failed: {}\n", .{err}); + }; + } + + const PullRequestTask = struct { + task: Task, + allocator: std.mem.Allocator, + filter: CrdsFilter, + crds_table: *const CrdsTable, + output: std.ArrayList(CrdsValue), + done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + + pub fn callback(task: *Task) void { + var this = @fieldParentPtr(@This(), "task", task); + defer this.done.store(true, std.atomic.Ordering.Release); + + const response_crds_values = pull_response.filterCrdsValues( + this.allocator, + this.crds_table, + &this.filter, + crds.getWallclockMs(), + MAX_NUM_CRDS_VALUES_PULL_RESPONSE, + ) catch { + // std.debug.print("filterCrdsValues failed\n", .{}); + return; + }; + defer response_crds_values.deinit(); + + this.output.appendSlice(response_crds_values.items) catch { + // std.debug.print("append slice failed\n", .{}); + return; + }; + // std.debug.print("success: len = {}\n", .{ response_crds_values.items.len }); + } + }; + + fn handleBatchPullRequestParallel( + self: *Self, + pull_requests: std.ArrayList(PullRequestMessage), + ) !void { + // update the callers + const now = getWallclockMs(); + { + var crds_table_lock = self.crds_table_rw.write(); + defer crds_table_lock.unlock(); + var crds_table: *CrdsTable = crds_table_lock.mut(); + + for (pull_requests.items) |*req| { + const caller = req.value.id(); + crds_table.insert(req.value, now) catch {}; + crds_table.updateRecordTimestamp(caller, now); + } + } + + const n_requests = pull_requests.items.len; + var valid_indexs = try std.ArrayList(usize).initCapacity(self.allocator, n_requests); + defer valid_indexs.deinit(); + + { + var ping_cache_lock = self.ping_cache_rw.write(); + defer ping_cache_lock.unlock(); + var ping_cache: *PingCache = ping_cache_lock.mut(); + + var ping_buff = [_]u8{0} ** PACKET_DATA_SIZE; + + for (pull_requests.items, 0..) |req, i| { + // filter out valid peers and send ping messages to peers + var now_instant = std.time.Instant.now() catch @panic("time is not supported on this OS!"); + var puller_socket_addr = SocketAddr.fromEndpoint(&req.from_endpoint); + + const caller = req.value.id(); + var result = ping_cache.check( + now_instant, + .{ caller, puller_socket_addr }, + &self.my_keypair, + ); + + // send a ping + if (result.maybe_ping) |ping| { + var protocol_msg = Protocol{ .PingMessage = ping }; + var serialized_ping = bincode.writeToSlice(&ping_buff, protocol_msg, .{}) catch return error.SerializationError; + var packet = Packet.init(req.from_endpoint, ping_buff, serialized_ping.len); + try self.packet_outgoing_channel.send(packet); + } + + if (result.passes_ping_check) { + valid_indexs.appendAssumeCapacity(i); + } + } + } + + if (valid_indexs.items.len == 0) { + return; + } + + // create the pull requests + + const n_valid_requests = valid_indexs.items.len; + var tasks = try std.ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); + defer tasks.deinit(); + + { + var crds_table_lock = self.crds_table_rw.read(); + const crds_table: *const CrdsTable = crds_table_lock.get(); + defer crds_table_lock.unlock(); + + for (valid_indexs.items) |i| { + // create the thread task + var output = std.ArrayList(CrdsValue).init(self.allocator); + var task = PullRequestTask{ + .task = .{ .callback = PullRequestTask.callback }, + .filter = pull_requests.items[i].filter, + .crds_table = crds_table, + .output = output, + .allocator = self.allocator, + }; + + // alloc on heap + var task_heap = try self.allocator.create(PullRequestTask); + task_heap.* = task; + tasks.appendAssumeCapacity(task_heap); + + // run it + const batch = Batch.from(&task_heap.task); + ThreadPool.schedule(self.thread_pool, batch); + } + + // _ = pool; + // for (tasks.items) |task| { + // task.task.callback(&task.task); + // } + + // wait for them to be done to release the lock + for (tasks.items) |task| { + while (!task.done.load(std.atomic.Ordering.Acquire)) { + // wait + } + } + } + + for (tasks.items, valid_indexs.items) |task, message_i| { + const from_endpoint = pull_requests.items[message_i].from_endpoint; + defer { + task.output.deinit(); + self.allocator.destroy(task); + } + + const maybe_packets = try crdsValuesToPackets( + self.allocator, + &self.my_pubkey, + task.output.items, + &from_endpoint, + ChunkType.PullResponse, + ); + if (maybe_packets) |packets| { + defer packets.deinit(); + for (packets.items) |packet| { + try self.packet_outgoing_channel.send(packet); + } + } + } + } + + fn handleBatchPullRequestSequential( + self: *Self, + pull_requests: std.ArrayList(PullRequestMessage), + ) !void { + for (pull_requests.items) |*pr| { + const maybe_resp_packets = try self.handlePullRequest( + pr.value.*, + pr.filter.*, + pr.from_endpoint.*, + null, + ); + if (maybe_resp_packets) |*resp_packets| { + for (resp_packets.items) |packet| { + try self.packet_outgoing_channel.send(packet); + } + } + } + } + /// logic for handling a pull request message /// values which are missing in the pull request filter are returned as a pull response /// which are serialized into packets. @@ -921,11 +1147,11 @@ pub const GossipService = struct { // logging maybe_log_entry: ?Entry, ) error{ SerializationError, OutOfMemory, ChannelClosed }!?std.ArrayList(Packet) { - const now = get_wallclock_ms(); + const now = getWallclockMs(); { var x_timer = std.time.Timer.start() catch unreachable; - defer { + defer { const elapsed = x_timer.read(); std.debug.print("pull_request crds_table_insert took {}ns\n", .{elapsed}); } @@ -940,7 +1166,7 @@ pub const GossipService = struct { // filter out valid peers and send ping messages to peers var now_instant = std.time.Instant.now() catch @panic("time is not supported on this OS!"); - var puller_socket_addr = SocketAddr.fromEndpoint(pull_from_endpoint); + var puller_socket_addr = SocketAddr.fromEndpoint(&pull_from_endpoint); var ping_cache_lock = self.ping_cache_rw.write(); var ping_cache: *PingCache = ping_cache_lock.mut(); @@ -968,13 +1194,12 @@ pub const GossipService = struct { return null; } - const MAX_NUM_CRDS_VALUES_PULL_RESPONSE = 20; // TODO: this is approx the rust one -- should tune const crds_values = blk: { var crds_table_lock = self.crds_table_rw.read(); defer crds_table_lock.unlock(); var x_timer = std.time.Timer.start() catch unreachable; - defer { + defer { const elapsed = x_timer.read(); std.debug.print("pull_request filterCrdsValues took {}ns\n", .{elapsed}); } @@ -1020,7 +1245,7 @@ pub const GossipService = struct { maybe_pull_log_entry: ?Entry, ) error{OutOfMemory}!void { // TODO: benchmark and compare with labs' preprocessing - const now = get_wallclock_ms(); + const now = getWallclockMs(); var crds_table_lock = self.crds_table_rw.write(); var crds_table: *CrdsTable = crds_table_lock.mut(); @@ -1093,7 +1318,7 @@ pub const GossipService = struct { /// the prune message to process prune_data: *const PruneData, ) error{ PruneMessageTooOld, BadDestination }!void { - const now = get_wallclock_ms(); + const now = getWallclockMs(); const prune_wallclock = prune_data.wallclock; const too_old = prune_wallclock < now -| CRDS_GOSSIP_PRUNE_MSG_TIMEOUT_MS; if (too_old) { @@ -1147,7 +1372,7 @@ pub const GossipService = struct { var prune_packets = try std.ArrayList(Packet).initCapacity(self.allocator, n_packets); errdefer prune_packets.deinit(); - const now = get_wallclock_ms(); + const now = getWallclockMs(); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; var index: usize = 0; @@ -1379,7 +1604,9 @@ pub fn crdsValuesToPackets( crds_values: []CrdsValue, to_endpoint: *const EndPoint, chunk_type: ChunkType, -) error{ OutOfMemory, SerializationError }!std.ArrayList(Packet) { +) error{ OutOfMemory, SerializationError }!?std.ArrayList(Packet) { + if (crds_values.len == 0) return null; + const indexs = try chunkValuesIntoPacketIndexs( allocator, crds_values, @@ -1477,7 +1704,7 @@ test "gossip.gossip_service: tests handle_prune_messages" { for (0..10) |_| { var rand_keypair = try KeyPair.create(null); var value = try CrdsValue.randomWithIndex(rng.random(), &rand_keypair, 0); // contact info - try lg.mut().insert(value, get_wallclock_ms()); + try lg.mut().insert(value, getWallclockMs()); try peers.append(value.data.LegacyContactInfo); } lg.unlock(); @@ -1501,7 +1728,7 @@ test "gossip.gossip_service: tests handle_prune_messages" { .destination = gossip_service.my_pubkey, .prunes = &prunes, .signature = undefined, - .wallclock = get_wallclock_ms(), + .wallclock = getWallclockMs(), }; try prune_data.sign(&my_keypair); @@ -1603,7 +1830,7 @@ test "gossip.gossip_service: tests handle_pull_request" { for (0..5) |_| { var value = try CrdsValue.randomWithIndex(rng.random(), &my_keypair, 0); value.data.LegacyContactInfo.id = Pubkey.random(rng.random(), .{}); - try crds_table.insert(value, get_wallclock_ms()); + try crds_table.insert(value, getWallclockMs()); // make sure well get a response from the request const vers_value = crds_table.get(value.label()).?; @@ -1626,7 +1853,7 @@ test "gossip.gossip_service: tests handle_pull_request" { var ci_data = crds.CrdsData.randomFromIndex(rng.random(), 0); ci_data.LegacyContactInfo.id = my_pubkey; - const crds_value = try CrdsValue.initSigned(ci_data, &my_keypair); + var crds_value = try CrdsValue.initSigned(ci_data, &my_keypair); const addr = SocketAddr.random(rng.random()); var ping_lock = gossip_service.ping_cache_rw.write(); @@ -1634,7 +1861,7 @@ test "gossip.gossip_service: tests handle_pull_request" { ping_cache._setPong(my_pubkey, addr); ping_lock.unlock(); - const filter = CrdsFilter{ + var filter = CrdsFilter{ .filter = bloom, .mask = (~@as(usize, 0)) >> N_FILTER_BITS, .mask_bits = N_FILTER_BITS, @@ -1647,8 +1874,19 @@ test "gossip.gossip_service: tests handle_pull_request" { null, ); defer packets.?.deinit(); - try std.testing.expect(packets.?.items.len > 0); + + var batch_requests = std.ArrayList(GossipService.PullRequestMessage).init(allocator); + defer batch_requests.deinit(); + + var from_endpoint = addr.toEndpoint(); + try batch_requests.append(GossipService.PullRequestMessage{ + .value = crds_value, + .filter = filter, + .from_endpoint = from_endpoint, + }); + + gossip_service.handleBatchPullRequest(batch_requests); } test "gossip.gossip_service: test build prune messages and handle_push_msgs" { @@ -1695,7 +1933,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { .LegacyContactInfo = send_contact_info, }, &my_keypair); var lg = gossip_service.crds_table_rw.write(); - try lg.mut().insert(ci_value, get_wallclock_ms()); + try lg.mut().insert(ci_value, getWallclockMs()); lg.unlock(); var forigins = try gossip_service.handlePushMessage(values.items); @@ -1754,7 +1992,7 @@ test "gossip.gossip_service: test build_pull_requests" { var lg = gossip_service.crds_table_rw.write(); for (0..20) |_| { var value = try CrdsValue.randomWithIndex(rng.random(), &keypair, 0); - try lg.mut().insert(value, get_wallclock_ms()); + try lg.mut().insert(value, getWallclockMs()); var pc: *PingCache = ping_lock.mut(); pc._setPong(value.data.LegacyContactInfo.id, value.data.LegacyContactInfo.gossip); } @@ -1799,7 +2037,7 @@ test "gossip.gossip_service: test build_push_messages" { for (0..10) |_| { var keypair = try KeyPair.create(null); var value = try CrdsValue.randomWithIndex(rng.random(), &keypair, 0); // contact info - try lg.mut().insert(value, get_wallclock_ms()); + try lg.mut().insert(value, getWallclockMs()); try peers.append(value.data.LegacyContactInfo); } lg.unlock(); @@ -1823,7 +2061,7 @@ test "gossip.gossip_service: test build_push_messages" { try push_queue.append(value); pqlg.unlock(); } - gossip_service.drainPushQueueToCrdsTable(get_wallclock_ms()); + gossip_service.drainPushQueueToCrdsTable(getWallclockMs()); var clg = gossip_service.crds_table_rw.read(); try std.testing.expect(clg.get().len() == 11); @@ -1869,7 +2107,7 @@ test "gossip.gossip_service: test packet verification" { var packet_verifier_handle = try Thread.spawn(.{}, GossipService.verifyPackets, .{&gossip_service}); - var rng = std.rand.DefaultPrng.init(get_wallclock_ms()); + var rng = std.rand.DefaultPrng.init(getWallclockMs()); var data = crds.CrdsData.randomFromIndex(rng.random(), 0); data.LegacyContactInfo.id = id; data.LegacyContactInfo.wallclock = 0; @@ -2058,7 +2296,7 @@ test "gossip.gossip_service: process contact_info push packet" { test "gossip.gossip_service: init, exit, and deinit" { var gossip_address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var my_keypair = try KeyPair.create(null); - var rng = std.rand.DefaultPrng.init(get_wallclock_ms()); + var rng = std.rand.DefaultPrng.init(getWallclockMs()); var contact_info = crds.LegacyContactInfo.random(rng.random()); contact_info.gossip = gossip_address; var exit = AtomicBool.init(false); @@ -2088,10 +2326,26 @@ test "gossip.gossip_service: init, exit, and deinit" { } const fuzz = @import("./fuzz.zig"); -pub const BenchmarkMessageProcessing = struct { + +const Sender = struct { + const Self = @This(); + + gs: *GossipService, + to_endpoint: EndPoint, + + pub fn send(self: *Self, msg: Protocol) void { + self.gs.verified_incoming_channel.send(ProtocolMessage{ + .message = msg, + .from_endpoint = self.to_endpoint, + }) catch @panic("ahhhh"); + } +}; + +pub const BenchmarkGossipServiceGeneral = struct { pub const min_iterations = 1; pub const max_iterations = 5; + // TODO: bigger values ltr pub const args = [_]usize{ 10, 100, @@ -2101,20 +2355,6 @@ pub const BenchmarkMessageProcessing = struct { "10_msg_iters", "100_msg_iters", }; - const Sender = struct { - const Self = @This(); - - gs: *GossipService, - to_endpoint: EndPoint, - - pub fn send(self: *Self, msg: Protocol) void { - self.gs.verified_incoming_channel.send(ProtocolMessage{ - .message = msg, - .from_endpoint = self.to_endpoint, - }) catch @panic("ahhhh"); - } - }; - pub fn benchmarkGossipService(num_message_iterations: usize) !void { const allocator = std.heap.page_allocator; var keypair = try KeyPair.create(null); @@ -2208,6 +2448,19 @@ pub const BenchmarkMessageProcessing = struct { exit.store(true, std.atomic.Ordering.Unordered); packet_handle.join(); } +}; + +pub const BenchmarkGossipServicePullRequest = struct { + pub const min_iterations = 1; + pub const max_iterations = 1; + + pub const args = [_]usize{ + 1_000, + }; + + pub const arg_names = [_][]const u8{ + "1_000", + }; pub fn benchmarkPullRequests(num_message_iterations: usize) !void { const allocator = std.heap.page_allocator; @@ -2235,7 +2488,11 @@ pub const BenchmarkMessageProcessing = struct { ); defer gossip_service.deinit(); - var packet_handle = try Thread.spawn(.{}, GossipService.processMessages, .{ + // var packet_handle = try Thread.spawn(.{}, GossipService.processMessages, .{ + // &gossip_service, + // }); + + var packet_handle = try Thread.spawn(.{}, GossipService.runSpy, .{ &gossip_service, }); @@ -2244,29 +2501,21 @@ pub const BenchmarkMessageProcessing = struct { var sender_keypair = try KeyPair.create(null); - var sender = Sender{ - .gs = &gossip_service, - .to_endpoint = address.toEndpoint(), - }; - var msg_sent: usize = 0; - for (0..num_message_iterations) |_| { + for (0..num_message_iterations) |i| { // send a push message - { + if (i % 2 == 0) { var packets = try fuzz.randomPushMessage(rng, &sender_keypair, address.toEndpoint()); defer packets.deinit(); for (packets.items) |packet| { - var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); - sender.send(msg); + try gossip_service.packet_incoming_channel.send(packet); msg_sent += 1; } - } - // send a pull request - { + } else { + // send a pull request var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); - var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); - sender.send(msg); + try gossip_service.packet_incoming_channel.send(packet); msg_sent += 1; } } @@ -2276,6 +2525,7 @@ pub const BenchmarkMessageProcessing = struct { if (v == msg_sent) { break; } + std.time.sleep(std.time.ns_per_s); } exit.store(true, std.atomic.Ordering.Unordered); diff --git a/src/gossip/pull_response.zig b/src/gossip/pull_response.zig index 6e35aef76..0fb078261 100644 --- a/src/gossip/pull_response.zig +++ b/src/gossip/pull_response.zig @@ -17,7 +17,6 @@ const CrdsFilter = crds_pull_req.CrdsFilter; pub const CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS: u64 = 15000; -// TODO: make it batch pub fn filterCrdsValues( alloc: std.mem.Allocator, crds_table: *const CrdsTable, @@ -41,7 +40,8 @@ pub fn filterCrdsValues( var match_indexs = try crds_table.getBitmaskMatches(alloc, filter.mask, filter.mask_bits); defer match_indexs.deinit(); - var output = try ArrayList(CrdsValue).initCapacity(alloc, match_indexs.items.len); + const output_size = @min(max_number_values, match_indexs.items.len); + var output = try ArrayList(CrdsValue).initCapacity(alloc, output_size); errdefer output.deinit(); for (match_indexs.items) |entry_index| { @@ -70,182 +70,6 @@ pub fn filterCrdsValues( return output; } -test "gossip.pull: test filter_crds_values batch" { - const N_FILTERS = 100; - const N_VALUES_IN_TABLE = 10_000; - - var crds_table = try CrdsTable.init(std.testing.allocator); - var crds_table_rw = RwMux(CrdsTable).init(crds_table); - defer { - var lg = crds_table_rw.write(); - lg.mut().deinit(); - } - var seed: u64 = 18; - var rand = std.rand.DefaultPrng.init(seed); - const rng = rand.random(); - - // insert a some values - const keypair = try KeyPair.create([_]u8{1} ** 32); - var lg = crds_table_rw.write(); - for (0..N_VALUES_IN_TABLE) |_| { - var crds_value = try crds.CrdsValue.random(rng, &keypair); - try lg.mut().insert(crds_value, 0); - } - lg.unlock(); - - const fuzz = @import("fuzz.zig"); - const SocketAddr = @import("../net/net.zig").SocketAddr; - const bincode = @import("../bincode/bincode.zig"); - const Protocol = @import("protocol.zig").Protocol; - - // create a pull request - // const allocator = std.testing.allocator; - const allocator = std.heap.c_allocator; - const to_addr = SocketAddr.random(rng).toEndpoint(); - - var filters = try std.ArrayList(CrdsFilter).initCapacity(allocator, N_FILTERS); - defer { - for (filters.items) |*filter| { - filter.deinit(); - } - filters.deinit(); - } - for (0..N_FILTERS) |_| { - const packet = try fuzz.randomPullRequest(allocator, rng, &keypair, to_addr); - var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); - var filter: CrdsFilter = msg.PullRequest[0]; - filters.appendAssumeCapacity(filter); - } - - // process them sequentially - var resp_values = std.ArrayList(CrdsValue).init(allocator); - defer resp_values.deinit(); - var read_lg = crds_table_rw.read(); - var crds_table_read: *const CrdsTable = read_lg.get(); - - var seq_timer = try std.time.Timer.start(); - for (filters.items) |*filter| { - const resp = try filterCrdsValues( - allocator, - crds_table_read, - filter, - crds.getWallclockMs(), - 100 - ); - defer resp.deinit(); - - try resp_values.appendSlice(resp.items); - } - read_lg.unlock(); - std.debug.assert(resp_values.items.len > 0); - const seq_elapsed = seq_timer.read(); - std.debug.print("SEQ: elapsed = {}\n", .{seq_elapsed}); - - // process them in parallel - const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; - const Task = ThreadPool.Task; - - var pool = ThreadPool.init(.{ - .max_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), - .stack_size = 2 * 1024 * 1024, - }); - - const PullRequestContext = struct { - filter: *const CrdsFilter, - crds_table: *const CrdsTable, - output: ArrayList(CrdsValue), - done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), - }; - - const PullRequestTask = struct { - task: Task, - context: *PullRequestContext, - allocator: std.mem.Allocator, - - pub fn callback(task: *Task) void { - var self = @fieldParentPtr(@This(), "task", task); - const response_crds_values = filterCrdsValues( - self.allocator, - self.context.crds_table, - self.context.filter, - crds.getWallclockMs(), - 100, - ) catch { - // std.debug.print("filterCrdsValues failed\n", .{}); - return; - }; - self.context.output.appendSlice(response_crds_values.items) catch { - // std.debug.print("append slice failed\n", .{}); - return; - }; - // std.debug.print("success: len = {}\n", .{ response_crds_values.items.len }); - self.context.done.store(true, std.atomic.Ordering.Release); - } - }; - - // read lock crds table - read_lg = crds_table_rw.read(); - crds_table_read = read_lg.get(); - var batch: ThreadPool.Batch = undefined; - var parallel_timer = try std.time.Timer.start(); - - var tasks = try std.ArrayList(*PullRequestTask).initCapacity(allocator, filters.items.len); - for (filters.items, 0..) |*filter_i, i| { - var output = ArrayList(CrdsValue).init(allocator); - var context = PullRequestContext { - .filter = filter_i, - .crds_table = crds_table_read, - .output = output, - }; - var context_heap = try allocator.create(PullRequestContext); - context_heap.* = context; - - var pull_task = PullRequestTask { - .task = .{ .callback = PullRequestTask.callback }, - .context = context_heap, - .allocator = allocator, - }; - - // alloc on heap - var pull_task_heap = try allocator.create(PullRequestTask); - pull_task_heap.* = pull_task; - tasks.appendAssumeCapacity(pull_task_heap); - - if (i == 0) { - batch = ThreadPool.Batch.from(&pull_task_heap.task); - } else { - var tmp_batch = ThreadPool.Batch.from(&pull_task_heap.task); - batch.push(tmp_batch); - } - } - // schedule the threadpool - ThreadPool.schedule(&pool, batch); - - for (tasks.items) |task| { - while (!task.context.done.load(std.atomic.Ordering.Acquire)) { - // wait - } - } - // unlock crds table - read_lg.unlock(); - const parallel_elapsed = parallel_timer.read(); - std.debug.print("PARALLEL: elapsed: {}\n", .{parallel_elapsed}); - - var total_len: usize = 0; - for (tasks.items) |task| { - total_len += task.context.output.items.len; - } - try std.testing.expect(total_len == resp_values.items.len); - - const time_diff: i128 = @as(i128, @intCast(parallel_elapsed)) - @as(i128, @intCast(seq_elapsed)); - std.debug.print("TIME DIFF: {}(ns)\n", .{time_diff}); - if (time_diff > 0) { - std.debug.print("sequential fast\n", .{}); - } else { - std.debug.print("parallel fast\n", .{}); - } -} - test "gossip.pull: test filter_crds_values" { var crds_table = try CrdsTable.init(std.testing.allocator); var crds_table_rw = RwMux(CrdsTable).init(crds_table); diff --git a/src/lib.zig b/src/lib.zig index acb52257b..e396e1d7c 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -45,6 +45,7 @@ pub const sync = struct { pub usingnamespace @import("sync/mpmc.zig"); pub usingnamespace @import("sync/ref.zig"); pub usingnamespace @import("sync/mux.zig"); + pub usingnamespace @import("sync/thread_pool.zig"); }; pub const utils = struct { diff --git a/src/net/echo.zig b/src/net/echo.zig index 4ff89dddd..2b48ddf64 100644 --- a/src/net/echo.zig +++ b/src/net/echo.zig @@ -57,6 +57,7 @@ pub const Server = struct { port: u16, conns: *Channel(*Response), conns_in_flight: Atomic(usize), + exit: *const Atomic(bool), const Self = @This(); const Response = http.Server.Response; @@ -66,6 +67,7 @@ pub const Server = struct { allocator: std.mem.Allocator, port: u16, logger: Logger, + exit: *const Atomic(bool), ) Self { return Self{ .allocator = allocator, @@ -74,6 +76,7 @@ pub const Server = struct { .logger = logger, .conns = Channel(*Response).init(allocator, 1024), .conns_in_flight = Atomic(usize).init(0), + .exit = exit, }; } @@ -132,7 +135,8 @@ pub const Server = struct { self: *Self, ) !void { self.logger.debug("accepting new connections"); - while (!self.conns.isClosed()) { + while (!self.conns.isClosed() and !self.exit.load(std.atomic.Ordering.Unordered)) { + // TODO: change to non-blocking socket var response = self.server.accept(.{ .allocator = self.allocator, .header_strategy = .{ .dynamic = MAX_REQ_HEADER_SIZE }, @@ -305,7 +309,9 @@ test "net.echo: Server works" { defer logger.deinit(); logger.spawn(); - var server = Server.init(testing.allocator, port, logger); + var exit = Atomic(bool).init(false); + + var server = Server.init(testing.allocator, port, logger, &exit); defer server.deinit(); var server_thread_handle = try std.Thread.spawn(.{}, Server.listenAndServe, .{&server}); if (builtin.os.tag == .linux) try server_thread_handle.setName("server_thread"); diff --git a/src/net/net.zig b/src/net/net.zig index c57073d6b..fa744e91d 100644 --- a/src/net/net.zig +++ b/src/net/net.zig @@ -174,7 +174,7 @@ pub const SocketAddr = union(enum(u8)) { } } - pub fn fromEndpoint(endpoint: network.EndPoint) Self { + pub fn fromEndpoint(endpoint: *const network.EndPoint) Self { switch (endpoint.address) { .ipv4 => |v4| { return Self{ diff --git a/src/sync/thread_pool.zig b/src/sync/thread_pool.zig index 51fe7e8be..7aa1c3746 100644 --- a/src/sync/thread_pool.zig +++ b/src/sync/thread_pool.zig @@ -1182,135 +1182,138 @@ pub const ThreadPool = struct { }; }; -test "parallel for loop" { - var thread_pool = ThreadPool.init(.{ .max_threads = 12 }); - var sleepy_time: u32 = 100; - var huge_array = &[_]u32{ - sleepy_time + std.rand.DefaultPrng.init(1).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(2).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(3).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(4).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(5).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(6).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(7).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(8).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(9).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(10).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(11).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(12).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(13).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(14).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(15).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(16).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(17).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(18).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(19).random().uintAtMost(u32, 20), - sleepy_time + std.rand.DefaultPrng.init(20).random().uintAtMost(u32, 20), - }; - const Runner = struct { - completed: usize = 0, - total: usize = 0, - pub fn run(ctx: *@This(), value: u32, _: usize) void { - std.time.sleep(value); - ctx.completed += 1; - std.debug.assert(ctx.completed <= ctx.total); - } - }; - var runny = try std.heap.page_allocator.create(Runner); - runny.* = .{ .total = huge_array.len }; - try thread_pool.doAndWait(std.heap.page_allocator, null, runny, Runner.run, std.mem.span(huge_array)); - try std.testing.expectEqual(huge_array.len, runny.completed); -} - -pub fn NewWorkPool(comptime max_threads: ?usize) type { - return struct { - var pool: ThreadPool = undefined; - var loaded: bool = false; - - fn create() *ThreadPool { - @setCold(true); - - pool = ThreadPool.init(.{ - .max_threads = max_threads orelse @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), - .stack_size = 2 * 1024 * 1024, - }); - return &pool; - } - - pub fn deinit() void { - get().deinit(); - } - - pub inline fn get() *ThreadPool { - // lil racy - if (loaded) return &pool; - loaded = true; - - return create(); - } - - pub fn scheduleBatch(batch: ThreadPool.Batch) void { - get().schedule(batch); - } - - pub fn scheduleTask(task: *ThreadPool.Task) void { - get().schedule(ThreadPool.Batch.from(task)); - } - - pub fn go(allocator: std.mem.Allocator, comptime Context: type, context: Context, comptime function: *const fn (Context) void) !void { - const TaskType = struct { - task: ThreadPool.Task, - context: Context, - allocator: std.mem.Allocator, - - pub fn callback(task: *ThreadPool.Task) void { - var this_task = @fieldParentPtr(@This(), "task", task); - function(this_task.context); - this_task.allocator.destroy(this_task); - } - }; - - var task_ = try allocator.create(TaskType); - task_.* = .{ - .task = .{ .callback = TaskType.callback }, - .context = context, - .allocator = allocator, - }; - scheduleTask(&task_.task); - } - }; -} - -pub const WorkPool = NewWorkPool(null); -const testing = std.testing; - -const CrdsTableTrimContext = struct { - index: usize, - max_trim: usize, - self: *CrdsTable, -}; - -const CrdsTable = struct { - pub fn trim(context: CrdsTableTrimContext) void { - const self = context.self; - _ = self; - const max_trim = context.max_trim; - _ = max_trim; - const index = context.index; - _ = index; - - std.debug.print("I ran!\n\n", .{}); - // todo - - } -}; - -test "sync.thread_pool: workpool works" { - var crds: CrdsTable = CrdsTable{}; - var a = CrdsTableTrimContext{ .index = 1, .max_trim = 2, .self = &crds }; - defer WorkPool.deinit(); - try WorkPool.go(testing.allocator, CrdsTableTrimContext, a, CrdsTable.trim); - - std.time.sleep(std.time.ns_per_s * 1); - WorkPool.pool.shutdown(); -} \ No newline at end of file +// test "parallel for loop" { +// var thread_pool = ThreadPool.init(.{ .max_threads = 12 }); +// var sleepy_time: u32 = 100; +// var random = std.rand.DefaultPrng.init(1); +// var rng = random.random(); + +// var huge_array = &[_]u32{ +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// sleepy_time + rng.uintAtMost(u32, 20), +// }; +// const Runner = struct { +// completed: usize = 0, +// total: usize = 0, +// pub fn run(ctx: *@This(), value: u32, _: usize) void { +// std.time.sleep(value); +// ctx.completed += 1; +// std.debug.assert(ctx.completed <= ctx.total); +// } +// }; +// var runny = try std.heap.page_allocator.create(Runner); +// runny.* = .{ .total = huge_array.len }; +// try thread_pool.doAndWait(std.heap.page_allocator, null, runny, Runner.run, std.mem.span(huge_array)); +// try std.testing.expectEqual(huge_array.len, runny.completed); +// } + +// pub fn NewWorkPool(comptime max_threads: ?usize) type { +// return struct { +// var pool: ThreadPool = undefined; +// var loaded: bool = false; + +// fn create() *ThreadPool { +// @setCold(true); + +// pool = ThreadPool.init(.{ +// .max_threads = max_threads orelse @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), +// .stack_size = 2 * 1024 * 1024, +// }); +// return &pool; +// } + +// pub fn deinit() void { +// get().deinit(); +// } + +// pub inline fn get() *ThreadPool { +// // lil racy +// if (loaded) return &pool; +// loaded = true; + +// return create(); +// } + +// pub fn scheduleBatch(batch: ThreadPool.Batch) void { +// get().schedule(batch); +// } + +// pub fn scheduleTask(task: *ThreadPool.Task) void { +// get().schedule(ThreadPool.Batch.from(task)); +// } + +// pub fn go(allocator: std.mem.Allocator, comptime Context: type, context: Context, comptime function: *const fn (Context) void) !void { +// const TaskType = struct { +// task: ThreadPool.Task, +// context: Context, +// allocator: std.mem.Allocator, + +// pub fn callback(task: *ThreadPool.Task) void { +// var this_task = @fieldParentPtr(@This(), "task", task); +// function(this_task.context); +// this_task.allocator.destroy(this_task); +// } +// }; + +// var task_ = try allocator.create(TaskType); +// task_.* = .{ +// .task = .{ .callback = TaskType.callback }, +// .context = context, +// .allocator = allocator, +// }; +// scheduleTask(&task_.task); +// } +// }; +// } + +// pub const WorkPool = NewWorkPool(null); +// const testing = std.testing; + +// const CrdsTableTrimContext = struct { +// index: usize, +// max_trim: usize, +// self: *CrdsTable, +// }; + +// const CrdsTable = struct { +// pub fn trim(context: CrdsTableTrimContext) void { +// const self = context.self; +// _ = self; +// const max_trim = context.max_trim; +// _ = max_trim; +// const index = context.index; +// _ = index; + +// std.debug.print("I ran!\n\n", .{}); +// // todo + +// } +// }; + +// test "sync.thread_pool: workpool works" { +// var crds: CrdsTable = CrdsTable{}; +// var a = CrdsTableTrimContext{ .index = 1, .max_trim = 2, .self = &crds }; +// defer WorkPool.deinit(); +// try WorkPool.go(testing.allocator, CrdsTableTrimContext, a, CrdsTable.trim); + +// std.time.sleep(std.time.ns_per_s * 1); +// WorkPool.pool.shutdown(); +// } From c8253b31841b4cb9afcaab1d3ed5fb57e2c12c4f Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Thu, 21 Sep 2023 13:27:08 -0400 Subject: [PATCH 08/72] parallel processing for crds table trim --- src/gossip/active_set.zig | 5 +- src/gossip/crds_shards.zig | 4 +- src/gossip/crds_table.zig | 126 +++++++++++++++++++++++++++------- src/gossip/gossip_service.zig | 116 ++++++++++++------------------- src/gossip/pull_request.zig | 4 +- src/gossip/pull_response.zig | 4 +- src/net/echo.zig | 2 +- src/sync/thread_pool.zig | 2 +- 8 files changed, 158 insertions(+), 105 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index fcda5fa37..9f760d0e0 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -132,10 +132,13 @@ pub const ActiveSet = struct { } }; +const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; + test "gossip.active_set: init/deinit" { var alloc = std.testing.allocator; - var crds_table = try CrdsTable.init(alloc); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(alloc, &tp); defer crds_table.deinit(); // insert some contacts diff --git a/src/gossip/crds_shards.zig b/src/gossip/crds_shards.zig index b7aa6e649..4ca87ff71 100644 --- a/src/gossip/crds_shards.zig +++ b/src/gossip/crds_shards.zig @@ -157,7 +157,9 @@ fn filter_crds_values( } test "gossip.crds_shards: test shard find" { - var crds_table = try CrdsTable.init(std.testing.allocator); + const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); // gen ranndom values diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 5562ca5fc..298aaee0d 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -17,6 +17,10 @@ const CrdsVersionedValue = crds.CrdsVersionedValue; const CrdsValueLabel = crds.CrdsValueLabel; const LegacyContactInfo = crds.LegacyContactInfo; +const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; +const Task = ThreadPool.Task; +const Batch = ThreadPool.Batch; + const Transaction = @import("../core/transaction.zig").Transaction; const Pubkey = @import("../core/pubkey.zig").Pubkey; const KeyPair = std.crypto.sign.Ed25519.KeyPair; @@ -95,10 +99,11 @@ pub const CrdsTable = struct { cursor: usize = 0, allocator: std.mem.Allocator, + thread_pool: *ThreadPool, const Self = @This(); - pub fn init(allocator: std.mem.Allocator) !Self { + pub fn init(allocator: std.mem.Allocator, thread_pool: *ThreadPool) !Self { return Self{ .store = AutoArrayHashMap(CrdsValueLabel, CrdsVersionedValue).init(allocator), .contact_infos = AutoArrayHashSet(usize).init(allocator), @@ -111,6 +116,7 @@ pub const CrdsTable = struct { .shards = try CrdsShards.init(allocator), .purged = HashTimeQueue.init(allocator), .allocator = allocator, + .thread_pool = thread_pool, }; } @@ -565,25 +571,35 @@ pub const CrdsTable = struct { } } - pub fn getOldLabels( - self: *Self, - now: u64, - timeout: u64, - ) error{OutOfMemory}!std.ArrayList(CrdsValueLabel) { - var old_labels = std.ArrayList(CrdsValueLabel).init(self.allocator); + const GetOldLabelsTask = struct { + // context + key: Pubkey, + crds_table: *const CrdsTable, + cutoff_timestamp: u64, + old_labels: std.ArrayList(CrdsValueLabel), - const cutoff_timestamp = now -| timeout; - const n_pubkeys = self.pubkey_to_values.count(); - for (self.pubkey_to_values.keys()[0..n_pubkeys]) |key| { - const entry = self.pubkey_to_values.getEntry(key).?; + // standard + task: Task = .{ .callback = callback }, + done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + + pub fn deinit(self: *GetOldLabelsTask) void { + self.old_labels.deinit(); + } + + pub fn callback(task: *Task) void { + var this = @fieldParentPtr(@This(), "task", task); + defer this.done.store(true, std.atomic.Ordering.Release); + + // get assocaited entries + const entry = this.crds_table.pubkey_to_values.getEntry(this.key).?; // if contact info is up to date then we dont need to check the values const pubkey = entry.key_ptr; const label = CrdsValueLabel{ .LegacyContactInfo = pubkey.* }; - if (self.get(label)) |*contact_info| { + if (this.crds_table.get(label)) |*contact_info| { const value_timestamp = @min(contact_info.value.wallclock(), contact_info.timestamp_on_insertion); - if (value_timestamp > cutoff_timestamp) { - continue; + if (value_timestamp > this.cutoff_timestamp) { + return; } } @@ -592,15 +608,68 @@ pub const CrdsTable = struct { const count = entry_indexs.count(); for (entry_indexs.iterator().keys[0..count]) |entry_index| { - const versioned_value = self.store.values()[entry_index]; + const versioned_value = this.crds_table.store.values()[entry_index]; const value_timestamp = @min(versioned_value.value.wallclock(), versioned_value.timestamp_on_insertion); - if (value_timestamp <= cutoff_timestamp) { - try old_labels.append(versioned_value.value.label()); + if (value_timestamp <= this.cutoff_timestamp) { + this.old_labels.append(versioned_value.value.label()) catch unreachable; } } } + }; + + pub fn getOldLabels( + self: *Self, + now: u64, + timeout: u64, + ) error{OutOfMemory}!std.ArrayList(CrdsValueLabel) { + const cutoff_timestamp = now -| timeout; + const n_pubkeys = self.pubkey_to_values.count(); + + var tasks = try std.ArrayList(*GetOldLabelsTask).initCapacity(self.allocator, n_pubkeys); + defer { + for (tasks.items) |task| { + task.deinit(); + self.allocator.destroy(task); + } + tasks.deinit(); + } + + // run this loop in parallel + for (self.pubkey_to_values.keys()[0..n_pubkeys]) |key| { + var old_labels = std.ArrayList(CrdsValueLabel).init(self.allocator); + var task = GetOldLabelsTask{ + .key = key, + .crds_table = self, + .cutoff_timestamp = cutoff_timestamp, + .old_labels = old_labels, + }; + + // alloc on heap + var task_heap = try self.allocator.create(GetOldLabelsTask); + task_heap.* = task; + tasks.appendAssumeCapacity(task_heap); + + // run it + const batch = Batch.from(&task_heap.task); + ThreadPool.schedule(self.thread_pool, batch); + } + + // wait for them to be done to release the lock + var output_length: u64 = 0; + for (tasks.items) |task| { + while (!task.done.load(std.atomic.Ordering.Acquire)) { + // wait + } + output_length += task.old_labels.items.len; + } + + // move labels to one big array + var output = try std.ArrayList(CrdsValueLabel).initCapacity(self.allocator, output_length); + for (tasks.items) |task| { + output.appendSliceAssumeCapacity(task.old_labels.items); + } - return old_labels; + return output; } }; @@ -687,7 +756,8 @@ test "gossip.crds_table: remove old values" { var seed: u64 = @intCast(std.time.milliTimestamp()); var rng = std.rand.DefaultPrng.init(seed); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); for (0..5) |_| { @@ -714,7 +784,8 @@ test "gossip.crds_table: insert and remove value" { var seed: u64 = @intCast(std.time.milliTimestamp()); var rng = std.rand.DefaultPrng.init(seed); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); const value = try CrdsValue.initSigned(CrdsData.randomFromIndex(rng.random(), 0), &keypair); @@ -730,7 +801,8 @@ test "gossip.crds_table: trim pruned values" { var seed: u64 = @intCast(std.time.milliTimestamp()); var rng = std.rand.DefaultPrng.init(seed); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); const N_VALUES = 10; @@ -793,7 +865,8 @@ test "gossip.HashTimeQueue: trim pruned values" { }; var value = try CrdsValue.initSigned(data, &keypair); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); // timestamp = 100 @@ -825,7 +898,8 @@ test "gossip.crds_table: insert and get" { const rng = rand.random(); var value = try CrdsValue.random(rng, &keypair); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); try crds_table.insert(value, 0); @@ -846,7 +920,8 @@ test "gossip.crds_table: insert and get votes" { .Vote = .{ 0, vote }, }, &kp); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); try crds_table.insert(crds_value, 0); @@ -885,7 +960,8 @@ test "gossip.crds_table: insert and get contact_info" { .LegacyContactInfo = legacy_contact_info, }, &kp); - var crds_table = try CrdsTable.init(std.testing.allocator); + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); // test insertion diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index af839af7b..72111a27d 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -138,7 +138,13 @@ pub const GossipService = struct { verified_incoming_channel.deinit(); } - var crds_table = try CrdsTable.init(allocator); + var thread_pool = try allocator.create(ThreadPool); + thread_pool.* = ThreadPool.init(.{ + .max_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), + .stack_size = 2 * 1024 * 1024, + }); + + var crds_table = try CrdsTable.init(allocator, thread_pool); errdefer crds_table.deinit(); var crds_table_rw = RwMux(CrdsTable).init(crds_table); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, false); @@ -155,12 +161,6 @@ pub const GossipService = struct { var push_msg_q = std.ArrayList(CrdsValue).init(allocator); var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); - var thread_pool = try allocator.create(ThreadPool); - thread_pool.* = ThreadPool.init(.{ - .max_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), - .stack_size = 2 * 1024 * 1024, - }); - return Self{ .my_contact_info = my_contact_info, .my_keypair = my_keypair, @@ -443,13 +443,6 @@ pub const GossipService = struct { // pull_log_entry.info("received pull response"); }, .PullRequest => |*pull| { - // var x_timer = std.time.Timer.start() catch unreachable; - // defer { - // const elapsed = x_timer.read(); - // std.debug.print("pull_request took {}ns\n", .{elapsed}); - // } - - // var pull_filter: CrdsFilter = pull[0]; var pull_value: CrdsValue = pull[1]; // contact info switch (pull_value.data) { .LegacyContactInfo => |*info| { @@ -467,41 +460,6 @@ pub const GossipService = struct { .value = pull[1], .from_endpoint = from_endpoint, }); - - // var endpoint_buf = std.ArrayList(u8).init(self.allocator); - // try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); - // defer endpoint_buf.deinit(); - - // var pull_log_entry = self.logger - // .field("from_endpoint", endpoint_buf.items) - // .field("from_pubkey", &pull_value.id().string()); - - // var maybe_packets = self.handlePullRequest( - // pull_value, - // pull_filter, - // from_endpoint, - // pull_log_entry, - // ) catch |err| { - // pull_log_entry.field("error", @errorName(err)) - // .err("error handling pull request"); - // continue; - // }; - - // if (maybe_packets == null) { - // pull_log_entry.field("num_packets_resp", 0) - // .info("received pull request"); - // continue; - // } - - // var packets = maybe_packets.?; - // defer packets.deinit(); - - // pull_log_entry.field("num_packets_resp", packets.items.len) - // .info("received pull request"); - - // for (packets.items) |packet| { - // try self.packet_outgoing_channel.send(packet); - // } }, .PruneMessage => |*prune| { const prune_msg: PruneData = prune[1]; @@ -570,18 +528,28 @@ pub const GossipService = struct { } // handle batch messages - self.handleBatchPullRequest(pull_requests); - for (pull_requests.items) |*pr| { - pr.filter.deinit(); + if (pull_requests.items.len > 0) { + // var pull_req_timer = std.time.Timer.start() catch unreachable; + // defer { + // std.debug.print("filter_crds_values elapsed {any} for {any} filters\n", .{ + // pull_req_timer.read(), + // pull_requests.items.len + // }); + // } + + self.handleBatchPullRequest(pull_requests); + for (pull_requests.items) |*pr| { + pr.filter.deinit(); + } + pull_requests.clearRetainingCapacity(); } - pull_requests.clearRetainingCapacity(); { - // var table_timer = std.time.Timer.start() catch unreachable; - // defer { - // const elapsed = table_timer.read(); - // std.debug.print("crds table trim took {}ns\n", .{elapsed}); - // } + var table_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = table_timer.read(); + std.debug.print("crds table trim took {}ns\n", .{elapsed}); + } var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); @@ -956,13 +924,18 @@ pub const GossipService = struct { } const PullRequestTask = struct { - task: Task, allocator: std.mem.Allocator, filter: CrdsFilter, crds_table: *const CrdsTable, output: std.ArrayList(CrdsValue), + + task: Task, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + pub fn deinit(this: *PullRequestTask) void { + this.output.deinit(); + } + pub fn callback(task: *Task) void { var this = @fieldParentPtr(@This(), "task", task); defer this.done.store(true, std.atomic.Ordering.Release); @@ -1047,10 +1020,15 @@ pub const GossipService = struct { } // create the pull requests - const n_valid_requests = valid_indexs.items.len; var tasks = try std.ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); - defer tasks.deinit(); + defer { + for (tasks.items) |task| { + task.deinit(); + self.allocator.destroy(task); + } + tasks.deinit(); + } { var crds_table_lock = self.crds_table_rw.read(); @@ -1078,11 +1056,6 @@ pub const GossipService = struct { ThreadPool.schedule(self.thread_pool, batch); } - // _ = pool; - // for (tasks.items) |task| { - // task.task.callback(&task.task); - // } - // wait for them to be done to release the lock for (tasks.items) |task| { while (!task.done.load(std.atomic.Ordering.Acquire)) { @@ -1093,11 +1066,6 @@ pub const GossipService = struct { for (tasks.items, valid_indexs.items) |task, message_i| { const from_endpoint = pull_requests.items[message_i].from_endpoint; - defer { - task.output.deinit(); - self.allocator.destroy(task); - } - const maybe_packets = try crdsValuesToPackets( self.allocator, &self.my_pubkey, @@ -1120,9 +1088,9 @@ pub const GossipService = struct { ) !void { for (pull_requests.items) |*pr| { const maybe_resp_packets = try self.handlePullRequest( - pr.value.*, - pr.filter.*, - pr.from_endpoint.*, + pr.value, + pr.filter, + pr.from_endpoint, null, ); if (maybe_resp_packets) |*resp_packets| { diff --git a/src/gossip/pull_request.zig b/src/gossip/pull_request.zig index a9917d66a..18f28e0bb 100644 --- a/src/gossip/pull_request.zig +++ b/src/gossip/pull_request.zig @@ -249,7 +249,9 @@ pub fn hashToU64(hash: *const Hash) u64 { } test "gossip.pull: test build_crds_filters" { - var crds_table = try CrdsTable.init(std.testing.allocator); + const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); defer crds_table.deinit(); // insert a some value diff --git a/src/gossip/pull_response.zig b/src/gossip/pull_response.zig index 0fb078261..e9ff12f0f 100644 --- a/src/gossip/pull_response.zig +++ b/src/gossip/pull_response.zig @@ -71,7 +71,9 @@ pub fn filterCrdsValues( } test "gossip.pull: test filter_crds_values" { - var crds_table = try CrdsTable.init(std.testing.allocator); + const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; + var tp = ThreadPool.init(.{}); + var crds_table = try CrdsTable.init(std.testing.allocator, &tp); var crds_table_rw = RwMux(CrdsTable).init(crds_table); defer { var lg = crds_table_rw.write(); diff --git a/src/net/echo.zig b/src/net/echo.zig index 2b48ddf64..5c21a5ce3 100644 --- a/src/net/echo.zig +++ b/src/net/echo.zig @@ -136,7 +136,7 @@ pub const Server = struct { ) !void { self.logger.debug("accepting new connections"); while (!self.conns.isClosed() and !self.exit.load(std.atomic.Ordering.Unordered)) { - // TODO: change to non-blocking socket + // TODO: change to non-blocking socket var response = self.server.accept(.{ .allocator = self.allocator, .header_strategy = .{ .dynamic = MAX_REQ_HEADER_SIZE }, diff --git a/src/sync/thread_pool.zig b/src/sync/thread_pool.zig index 7aa1c3746..341c63a31 100644 --- a/src/sync/thread_pool.zig +++ b/src/sync/thread_pool.zig @@ -54,7 +54,7 @@ pub const ThreadPool = struct { /// TODO: add CPU core affinity? pub const Config = struct { stack_size: u32 = (std.Thread.SpawnConfig{}).stack_size, - max_threads: u32, + max_threads: u32 = 1, }; /// Statically initialize the thread pool using the configuration. From 6a66c1d43dfc797485914a91738a3fbbcc81abed Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 26 Sep 2023 09:09:15 -0400 Subject: [PATCH 09/72] add per method timers --- src/gossip/gossip_service.zig | 67 ++++++++++++++++--------- src/gossip/socket_utils.zig | 92 ++++++++++++++++++++++------------- 2 files changed, 102 insertions(+), 57 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 72111a27d..6f3a73661 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -139,10 +139,12 @@ pub const GossipService = struct { } var thread_pool = try allocator.create(ThreadPool); + var n_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 8); thread_pool.* = ThreadPool.init(.{ - .max_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 2), + .max_threads = n_threads, .stack_size = 2 * 1024 * 1024, }); + std.debug.print("using n_threads in gossip: {}\n", .{n_threads}); var crds_table = try CrdsTable.init(allocator, thread_pool); errdefer crds_table.deinit(); @@ -373,16 +375,18 @@ pub const GossipService = struct { defer self.verified_incoming_channel.allocator.free(protocol_messages); msg_count += protocol_messages.len; + // TODO: filter messages based on_shred_version + for (protocol_messages) |*protocol_message| { var from_endpoint: EndPoint = protocol_message.from_endpoint; switch (protocol_message.message) { .PushMessage => |*push| { - // var x_timer = std.time.Timer.start() catch unreachable; - // defer { - // const elapsed = x_timer.read(); - // std.debug.print("push_message took {}ns\n", .{elapsed}); - // } + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("handle batch push took {} with {} items\n", .{ elapsed, 1 }); + } const push_from: Pubkey = push[0]; const push_values: []CrdsValue = push[1]; @@ -418,11 +422,11 @@ pub const GossipService = struct { push_log_entry.info("received push message"); }, .PullResponse => |*pull| { - // var x_timer = std.time.Timer.start() catch unreachable; - // defer { - // const elapsed = x_timer.read(); - // std.debug.print("pull_response took {}ns\n", .{elapsed}); - // } + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("handle batch pull_resp took {} with {} items\n", .{ elapsed, 1 }); + } const from: Pubkey = pull[0]; const crds_values: []CrdsValue = pull[1]; @@ -462,6 +466,11 @@ pub const GossipService = struct { }); }, .PruneMessage => |*prune| { + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("handle batch prune took {} with {} items\n", .{ elapsed, 1 }); + } const prune_msg: PruneData = prune[1]; var endpoint_buf = std.ArrayList(u8).init(self.allocator); @@ -484,6 +493,12 @@ pub const GossipService = struct { prune_log_entry.info("received prune message"); }, .PingMessage => |*ping| { + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("handle batch ping took {} with {} items\n", .{ elapsed, 1 }); + } + var endpoint_buf = std.ArrayList(u8).init(self.allocator); try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); defer endpoint_buf.deinit(); @@ -506,6 +521,12 @@ pub const GossipService = struct { .info("received ping message"); }, .PongMessage => |*pong| { + var x_timer = std.time.Timer.start() catch unreachable; + defer { + const elapsed = x_timer.read(); + std.debug.print("handle batch pong took {} with {} items\n", .{ elapsed, 1 }); + } + var endpoint_buf = std.ArrayList(u8).init(self.allocator); try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); defer endpoint_buf.deinit(); @@ -529,27 +550,25 @@ pub const GossipService = struct { // handle batch messages if (pull_requests.items.len > 0) { - // var pull_req_timer = std.time.Timer.start() catch unreachable; - // defer { - // std.debug.print("filter_crds_values elapsed {any} for {any} filters\n", .{ - // pull_req_timer.read(), - // pull_requests.items.len - // }); - // } - + var x_timer = std.time.Timer.start() catch unreachable; + const length = pull_requests.items.len; self.handleBatchPullRequest(pull_requests); + const elapsed = x_timer.read(); + std.debug.print("handle batch pull_req took {} with {} items\n", .{ elapsed, length }); + for (pull_requests.items) |*pr| { pr.filter.deinit(); } - pull_requests.clearRetainingCapacity(); } + pull_requests.clearRetainingCapacity(); { - var table_timer = std.time.Timer.start() catch unreachable; + var x_timer = std.time.Timer.start() catch unreachable; defer { - const elapsed = table_timer.read(); - std.debug.print("crds table trim took {}ns\n", .{elapsed}); + const elapsed = x_timer.read(); + std.debug.print("handle batch crds_trim took {} with {} items\n", .{ elapsed, 1 }); } + var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); @@ -2323,7 +2342,7 @@ pub const BenchmarkGossipServiceGeneral = struct { "10_msg_iters", "100_msg_iters", }; - pub fn benchmarkGossipService(num_message_iterations: usize) !void { + pub fn benchmarkGossipServiceProcessMessages(num_message_iterations: usize) !void { const allocator = std.heap.page_allocator; var keypair = try KeyPair.create(null); var address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index 16b91a147..c8f4c0003 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -73,8 +73,9 @@ pub const BenchmarkPacketProcessing = struct { pub const min_iterations = 3; pub const max_iterations = 5; + const N_ITERS = 100_000; + pub fn benchmarkReadSocket() !void { - const N_ITERS = 10; const allocator = std.heap.page_allocator; var channel = Channel(Packet).init(allocator, N_ITERS); @@ -89,31 +90,31 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); var handle = try std.Thread.spawn(.{}, readSocket, .{ &socket, channel, &exit, .noop }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, N_ITERS }); var rand = std.rand.DefaultPrng.init(0); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - for (0..N_ITERS) |_| { + var timer = std.time.Timer.start() catch unreachable; + for (1..(N_ITERS * 2 + 1)) |i| { rand.fill(&packet_buf); _ = try socket.sendTo(to_endpoint, &packet_buf); - } - - var count: usize = 0; - while (true) { - const values = channel.drain() orelse { - continue; - }; - count += values.len; - if (count == N_ITERS) { - break; + // 10Kb per second + // each packet is 1k bytes + // = 10 packets per second + if (i % 10 == 0) { + const elapsed = timer.read(); + if (elapsed < std.time.ns_per_s) { + std.time.sleep(std.time.ns_per_s - elapsed); + } } } + recv_handle.join(); exit.store(true, std.atomic.Ordering.Unordered); handle.join(); } pub fn benchmarkSendSocket() !void { - const N_ITERS = 10; const allocator = std.heap.page_allocator; var channel = Channel(Packet).init(allocator, N_ITERS); @@ -126,8 +127,9 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); - var handle = try std.Thread.spawn(.{}, sendSocket, .{ &socket, channel, &exit, .noop }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkSocketRecv, .{ &socket, N_ITERS }); + var handle = try std.Thread.spawn(.{}, sendSocket, .{ &socket, channel, &exit, .noop }); var rand = std.rand.DefaultPrng.init(0); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; for (0..N_ITERS) |_| { @@ -139,28 +141,52 @@ pub const BenchmarkPacketProcessing = struct { )); } - var count: usize = 0; - while (true) { - const recv_meta = socket.receiveFrom(&packet_buf) catch |err| { - if (err == error.WouldBlock) { - continue; - } else { - return error.SocketRecvError; - } - }; + recv_handle.join(); + exit.store(true, std.atomic.Ordering.Unordered); + handle.join(); + } +}; - const bytes_read = recv_meta.numberOfBytes; - if (bytes_read == 0) { - return error.SocketClosed; - } +pub fn benchmarkChannelRecv( + channel: *Channel(Packet), + N_ITERS: usize, +) !void { + var count: usize = 0; + while (true) { + const values = (try channel.try_drain()) orelse { + continue; + }; + count += values.len; + if (count >= N_ITERS) { + break; + } + } +} + +pub fn benchmarkSocketRecv( + socket: *UdpSocket, + total: usize, +) !void { + var count: usize = 0; + var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - count += 1; - if (count == N_ITERS) { - break; + while (true) { + const recv_meta = socket.receiveFrom(&packet_buf) catch |err| { + if (err == error.WouldBlock) { + continue; + } else { + return error.SocketRecvError; } + }; + + const bytes_read = recv_meta.numberOfBytes; + if (bytes_read == 0) { + return error.SocketClosed; } - exit.store(true, std.atomic.Ordering.Unordered); - handle.join(); + count += 1; + if (count == total) { + break; + } } -}; +} From 6f4fe666d862913af36b89340889c324dbca0b6f Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 26 Sep 2023 12:08:32 -0400 Subject: [PATCH 10/72] remove extra locks in LRU --- src/common/lru.zig | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index 14a7b19dc..755ef688b 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -14,7 +14,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { dbl_link_list: TailQueue(LruEntry), max_items: usize, len: usize = 0, - mux: std.Thread.Mutex, const Self = @This(); @@ -53,7 +52,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { .hashmap = hashmap, .dbl_link_list = TailQueue(LruEntry){}, .max_items = max_items, - .mux = std.Thread.Mutex{}, }; // pre allocate enough capacity for max items since we will use @@ -64,9 +62,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } pub fn deinit(self: *Self) void { - self.mux.lock(); - defer self.mux.unlock(); - while (self.dbl_link_list.pop()) |node| { self.deinitNode(node); } @@ -117,9 +112,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Inserts key/value if key doesn't exist, updates only value if it does. /// In any case, it will affect cache ordering. pub fn insert(self: *Self, key: K, value: V) error{OutOfMemory}!void { - self.mux.lock(); - defer self.mux.unlock(); - _ = self.internal_insert(key, value); return; } @@ -127,17 +119,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Whether or not contains key. /// NOTE: doesn't affect cache ordering. pub fn contains(self: *Self, key: K) bool { - self.mux.lock(); - defer self.mux.unlock(); - return self.hashmap.contains(key); } /// Most recently used entry pub fn mru(self: *Self) ?LruEntry { - self.mux.lock(); - defer self.mux.unlock(); - if (self.dbl_link_list.last) |node| { return node.data; } @@ -146,9 +132,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Least recently used entry pub fn lru(self: *Self) ?LruEntry { - self.mux.lock(); - defer self.mux.unlock(); - if (self.dbl_link_list.first) |node| { return node.data; } @@ -163,9 +146,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Gets value associated with key if exists pub fn get(self: *Self, key: K) ?V { - self.mux.lock(); - defer self.mux.unlock(); - if (self.hashmap.get(key)) |node| { self.dbl_link_list.remove(node); self.dbl_link_list.append(node); @@ -175,8 +155,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } pub fn pop(self: *Self, k: K) ?V { - self.mux.lock(); - defer self.mux.unlock(); if (self.hashmap.fetchSwapRemove(k)) |kv| { self.dbl_link_list.remove(kv.value); return kv.value.data.value; @@ -185,8 +163,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } pub fn peek(self: *Self, key: K) ?V { - self.mux.lock(); - defer self.mux.unlock(); if (self.hashmap.get(key)) |node| { return node.data.value; @@ -198,8 +174,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Puts a key-value pair into cache. If the key already exists in the cache, then it updates /// the key's value and returns the old value. Otherwise, `null` is returned. pub fn put(self: *Self, key: K, value: V) ?V { - self.mux.lock(); - defer self.mux.unlock(); if (self.hashmap.getEntry(key)) |existing_entry| { var existing_node: *Node = existing_entry.value_ptr.*; @@ -215,8 +189,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Removes key from cache. Returns true if found, false if not. pub fn remove(self: *Self, key: K) bool { - self.mux.lock(); - defer self.mux.unlock(); if (self.hashmap.fetchSwapRemove(key)) |kv| { var node = kv.value; From cfe39bb55e5c53b3bee80b13ced9c5dd9ef38154 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 26 Sep 2023 14:23:39 -0400 Subject: [PATCH 11/72] add packet batch reading from socket --- src/common/lru.zig | 3 - src/gossip/packet.zig | 11 ++- src/gossip/ping_pong.zig | 2 +- src/gossip/socket_utils.zig | 159 ++++++++++++++++++++++++++++++++++++ src/net/net.zig | 6 ++ 5 files changed, 175 insertions(+), 6 deletions(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index 755ef688b..c01f32090 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -163,7 +163,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } pub fn peek(self: *Self, key: K) ?V { - if (self.hashmap.get(key)) |node| { return node.data.value; } @@ -174,7 +173,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Puts a key-value pair into cache. If the key already exists in the cache, then it updates /// the key's value and returns the old value. Otherwise, `null` is returned. pub fn put(self: *Self, key: K, value: V) ?V { - if (self.hashmap.getEntry(key)) |existing_entry| { var existing_node: *Node = existing_entry.value_ptr.*; var old_value = existing_node.data.value; @@ -189,7 +187,6 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Removes key from cache. Returns true if found, false if not. pub fn remove(self: *Self, key: K) bool { - if (self.hashmap.fetchSwapRemove(key)) |kv| { var node = kv.value; self.dbl_link_list.remove(node); diff --git a/src/gossip/packet.zig b/src/gossip/packet.zig index 8fe8b098a..1d7710c2a 100644 --- a/src/gossip/packet.zig +++ b/src/gossip/packet.zig @@ -14,11 +14,18 @@ pub const Packet = struct { const Self = @This(); pub fn init(addr: network.EndPoint, data: [PACKET_DATA_SIZE]u8, size: usize) Self { - var self = Self{ + return .{ .addr = addr, .data = data, .size = size, }; - return self; + } + + pub fn default() Self { + return .{ + .addr = network.EndPoint.default(), + .data = undefined, + .size = 0, + }; } }; diff --git a/src/gossip/ping_pong.zig b/src/gossip/ping_pong.zig index 1c69e5248..9c7c7784b 100644 --- a/src/gossip/ping_pong.zig +++ b/src/gossip/ping_pong.zig @@ -149,7 +149,7 @@ pub const PingCache = struct { /// Records a `Pong` if corresponding `Ping` exists in `pending_cache` pub fn receviedPong(self: *Self, pong: *const Pong, socket: SocketAddr, now: Instant) bool { var peer_and_addr = newPubkeyAndSocketAddr(pong.from, socket); - if (self.pending_cache.peek(pong.hash)) |pubkey_and_addr| { + if (self.pending_cache.peek(pong.hash)) |*pubkey_and_addr| { const pubkey: Pubkey = pubkey_and_addr[0]; const addr: SocketAddr = pubkey_and_addr[1]; if (pubkey.equals(&pong.from) and addr.eql(&socket)) { diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index c8f4c0003..12f181921 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -5,6 +5,9 @@ const Channel = @import("../sync/channel.zig").Channel; const std = @import("std"); const Logger = @import("../trace/log.zig").Logger; +pub const SOCKET_TIMEOUT: usize = 1000000; +pub const PACKETS_PER_BATCH: usize = 64; + pub fn readSocket( socket: *UdpSocket, incoming_channel: *Channel(Packet), @@ -39,6 +42,103 @@ pub fn readSocket( logger.debugf("read_socket loop closed\n", .{}); } +pub fn readSocketV2( + allocator: std.mem.Allocator, + socket: *UdpSocket, + incoming_channel: *Channel(std.ArrayList(Packet)), + exit: *const std.atomic.Atomic(bool), + // logger: Logger, +) !void { + //Performance out of the IO without poll + // * block on the socket until it's readable + // * set the socket to non blocking + // * read until it fails + // * set it back to blocking before returning + + const MAX_WAIT_NS = std.time.ns_per_ms; // 1ms + + while (!exit.load(std.atomic.Ordering.Unordered)) { + // init a new batch + var count: usize = 0; + const capacity = PACKETS_PER_BATCH; + var packet_batch = try std.ArrayList(Packet).initCapacity( + allocator, + capacity, + ); + for (0..capacity) |_| { + packet_batch.appendAssumeCapacity(Packet.default()); + } + + // set socket to block + try socket.setReadTimeout(null); + var timer = std.time.Timer.start() catch unreachable; + + // recv packets into batch + while (true) { + var n_packets_read = recvMmsg(socket, packet_batch.items[count..capacity]) catch |err| { + if (count > 0 and err == error.WouldBlock) { + if (timer.read() > MAX_WAIT_NS) { + break; + } + continue; + } else { + return err; + } + }; + + if (count == 0) { + // set to nonblocking mode + try socket.setReadTimeout(SOCKET_TIMEOUT); + } + count += n_packets_read; + if (timer.read() > MAX_WAIT_NS or count >= capacity) { + break; + } + } + + if (count < capacity) { + packet_batch.shrinkAndFree(count); + } + try incoming_channel.send(packet_batch); + } +} + +pub fn recvMmsg( + socket: *UdpSocket, + /// pre-allocated array of packets to fill up + packet_batch: []Packet, +) !usize { + const max_size = packet_batch.len; + var count: usize = 0; + + while (count < max_size) { + var packet = &packet_batch[count]; + const recv_meta = socket.receiveFrom(&packet.data) catch |err| { + // would block then return + if (count > 0 and err == error.WouldBlock) { + break; + } else { + return err; + } + }; + + const bytes_read = recv_meta.numberOfBytes; + if (bytes_read == 0) { + return error.SocketClosed; + } + packet.addr = recv_meta.sender; + packet.size = bytes_read; + + if (count == 0) { + // nonblocking mode + try socket.setReadTimeout(SOCKET_TIMEOUT); + } + count += 1; + } + + return count; +} + pub fn sendSocket( socket: *UdpSocket, outgoing_channel: *Channel(Packet), @@ -114,6 +214,47 @@ pub const BenchmarkPacketProcessing = struct { handle.join(); } + pub fn benchmarkReadSocketV2() !void { + const allocator = std.heap.page_allocator; + + var channel = Channel(std.ArrayList(Packet)).init(allocator, N_ITERS); + defer channel.deinit(); + + var socket = try UdpSocket.create(.ipv4, .udp); + try socket.bindToPort(0); + try socket.setReadTimeout(1000000); // 1 second + + const to_endpoint = try socket.getLocalEndPoint(); + + var exit = std.atomic.Atomic(bool).init(false); + + var handle = try std.Thread.spawn(.{}, readSocketV2, .{ allocator, &socket, channel, &exit }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecvV2, .{ channel, N_ITERS }); + + var rand = std.rand.DefaultPrng.init(0); + var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; + var timer = std.time.Timer.start() catch unreachable; + + for (1..(N_ITERS * 2 + 1)) |i| { + rand.fill(&packet_buf); + _ = try socket.sendTo(to_endpoint, &packet_buf); + + // 10Kb per second + // each packet is 1k bytes + // = 10 packets per second + if (i % 10 == 0) { + const elapsed = timer.read(); + if (elapsed < std.time.ns_per_s) { + std.time.sleep(std.time.ns_per_s - elapsed); + } + } + } + + recv_handle.join(); + exit.store(true, std.atomic.Ordering.Unordered); + handle.join(); + } + pub fn benchmarkSendSocket() !void { const allocator = std.heap.page_allocator; @@ -147,6 +288,24 @@ pub const BenchmarkPacketProcessing = struct { } }; +pub fn benchmarkChannelRecvV2( + channel: *Channel(std.ArrayList(Packet)), + n_values_to_receive: usize, +) !void { + var count: usize = 0; + while (true) { + const values = (try channel.try_drain()) orelse { + continue; + }; + for (values) |packet_batch| { + count += packet_batch.items.len; + } + if (count >= n_values_to_receive) { + break; + } + } +} + pub fn benchmarkChannelRecv( channel: *Channel(Packet), N_ITERS: usize, diff --git a/src/net/net.zig b/src/net/net.zig index fa744e91d..b768f91f4 100644 --- a/src/net/net.zig +++ b/src/net/net.zig @@ -306,6 +306,12 @@ pub const IpAddr = union(enum(u32)) { } }; +pub fn endpointToString(allocator: std.mem.Allocator, endpoint: *const network.EndPoint) error{OutOfMemory}!std.ArrayList(u8) { + var endpoint_buf = try std.ArrayList(u8).initCapacity(allocator, 14); + try endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); + return endpoint_buf; +} + test "gossip.net: invalid ipv4 socket parsing" { { var addr = "127.0.0.11234"; From 7c7eeea1e908ca50328149f5cfa7df9a5fafee44 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 26 Sep 2023 14:31:18 -0400 Subject: [PATCH 12/72] fix socket benchmarks --- src/gossip/socket_utils.zig | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index 12f181921..9599c1bc3 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -173,12 +173,18 @@ pub const BenchmarkPacketProcessing = struct { pub const min_iterations = 3; pub const max_iterations = 5; - const N_ITERS = 100_000; + pub const args = [_]usize{ + 100_000, + }; - pub fn benchmarkReadSocket() !void { + pub const arg_names = [_][]const u8{ + "100k_msgs", + }; + + pub fn benchmarkReadSocket(n_packets: usize) !void { const allocator = std.heap.page_allocator; - var channel = Channel(Packet).init(allocator, N_ITERS); + var channel = Channel(Packet).init(allocator, n_packets); defer channel.deinit(); var socket = try UdpSocket.create(.ipv4, .udp); @@ -190,12 +196,12 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); var handle = try std.Thread.spawn(.{}, readSocket, .{ &socket, channel, &exit, .noop }); - var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, N_ITERS }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, n_packets }); var rand = std.rand.DefaultPrng.init(0); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; var timer = std.time.Timer.start() catch unreachable; - for (1..(N_ITERS * 2 + 1)) |i| { + for (1..(n_packets * 2 + 1)) |i| { rand.fill(&packet_buf); _ = try socket.sendTo(to_endpoint, &packet_buf); // 10Kb per second @@ -214,10 +220,10 @@ pub const BenchmarkPacketProcessing = struct { handle.join(); } - pub fn benchmarkReadSocketV2() !void { + pub fn benchmarkReadSocketV2(n_packets: usize) !void { const allocator = std.heap.page_allocator; - var channel = Channel(std.ArrayList(Packet)).init(allocator, N_ITERS); + var channel = Channel(std.ArrayList(Packet)).init(allocator, n_packets); defer channel.deinit(); var socket = try UdpSocket.create(.ipv4, .udp); @@ -229,13 +235,13 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); var handle = try std.Thread.spawn(.{}, readSocketV2, .{ allocator, &socket, channel, &exit }); - var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecvV2, .{ channel, N_ITERS }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecvV2, .{ channel, n_packets }); var rand = std.rand.DefaultPrng.init(0); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; var timer = std.time.Timer.start() catch unreachable; - for (1..(N_ITERS * 2 + 1)) |i| { + for (1..(n_packets * 2 + 1)) |i| { rand.fill(&packet_buf); _ = try socket.sendTo(to_endpoint, &packet_buf); @@ -255,10 +261,10 @@ pub const BenchmarkPacketProcessing = struct { handle.join(); } - pub fn benchmarkSendSocket() !void { + pub fn benchmarkSendSocket(n_packets: usize) !void { const allocator = std.heap.page_allocator; - var channel = Channel(Packet).init(allocator, N_ITERS); + var channel = Channel(Packet).init(allocator, n_packets); defer channel.deinit(); var socket = try UdpSocket.create(.ipv4, .udp); @@ -268,12 +274,12 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); - var recv_handle = try std.Thread.spawn(.{}, benchmarkSocketRecv, .{ &socket, N_ITERS }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkSocketRecv, .{ &socket, n_packets }); var handle = try std.Thread.spawn(.{}, sendSocket, .{ &socket, channel, &exit, .noop }); var rand = std.rand.DefaultPrng.init(0); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - for (0..N_ITERS) |_| { + for (0..n_packets) |_| { rand.fill(&packet_buf); try channel.send(Packet.init( to_endpoint, @@ -308,7 +314,7 @@ pub fn benchmarkChannelRecvV2( pub fn benchmarkChannelRecv( channel: *Channel(Packet), - N_ITERS: usize, + n_values_to_receive: usize, ) !void { var count: usize = 0; while (true) { @@ -316,7 +322,7 @@ pub fn benchmarkChannelRecv( continue; }; count += values.len; - if (count >= N_ITERS) { + if (count >= n_values_to_receive) { break; } } From f5bac41cac2dc0c93ab811da38095f346025b1e0 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 26 Sep 2023 14:45:16 -0400 Subject: [PATCH 13/72] gossip benchmarks OG baseline --- src/gossip/gossip_service.zig | 142 ++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 67 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 6f3a73661..b6430428e 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -14,6 +14,7 @@ const AtomicBool = std.atomic.Atomic(bool); const UdpSocket = network.Socket; const Tuple = std.meta.Tuple; const SocketAddr = @import("../net/net.zig").SocketAddr; +const endpointToString = @import("../net/net.zig").endpointToString; const _protocol = @import("protocol.zig"); const Protocol = _protocol.Protocol; const PruneData = _protocol.PruneData; @@ -111,9 +112,9 @@ pub const GossipService = struct { entrypoints: std.ArrayList(SocketAddr), ping_cache_rw: RwMux(PingCache), - echo_server: echo.Server, logger: Logger, thread_pool: *ThreadPool, + echo_server: ?echo.Server, // used for benchmarking messages_processed: std.atomic.Atomic(usize) = std.atomic.Atomic(usize).init(0), @@ -144,7 +145,7 @@ pub const GossipService = struct { .max_threads = n_threads, .stack_size = 2 * 1024 * 1024, }); - std.debug.print("using n_threads in gossip: {}\n", .{n_threads}); + logger.debugf("using n_threads in gossip: {}\n", .{n_threads}); var crds_table = try CrdsTable.init(allocator, thread_pool); errdefer crds_table.deinit(); @@ -157,11 +158,12 @@ pub const GossipService = struct { const gossip_address = my_contact_info.gossip; var gossip_socket = UdpSocket.create(.ipv4, .udp) catch return error.SocketCreateFailed; gossip_socket.bindToPort(gossip_address.port()) catch return error.SocketBindFailed; - gossip_socket.setReadTimeout(1000000) catch return error.SocketSetTimeoutFailed; // 1 second + gossip_socket.setReadTimeout(socket_utils.SOCKET_TIMEOUT) catch return error.SocketSetTimeoutFailed; // 1 second var failed_pull_hashes = HashTimeQueue.init(allocator); var push_msg_q = std.ArrayList(CrdsValue).init(allocator); - var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); + + // var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); return Self{ .my_contact_info = my_contact_info, @@ -187,7 +189,7 @@ pub const GossipService = struct { GOSSIP_PING_CACHE_CAPACITY, ), ), - .echo_server = echo_server, + .echo_server = null, .logger = logger, .thread_pool = thread_pool, }; @@ -206,8 +208,10 @@ pub const GossipService = struct { } pub fn deinit(self: *Self) void { - self.echo_server.deinit(); + // self.echo_server.deinit(); + self.gossip_socket.close(); + self.packet_incoming_channel.deinit(); self.packet_outgoing_channel.deinit(); self.verified_incoming_channel.deinit(); @@ -269,8 +273,8 @@ pub const GossipService = struct { } pub fn runSpy(self: *Self) !void { - var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); - defer self.joinAndExit(&ip_echo_server_listener_handle); + // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); + // defer self.joinAndExit(&ip_echo_server_listener_handle); var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ &self.gossip_socket, @@ -385,7 +389,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - std.debug.print("handle batch push took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch push took {} with {} items\n", .{ elapsed, 1 }); } const push_from: Pubkey = push[0]; @@ -425,7 +429,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - std.debug.print("handle batch pull_resp took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch pull_resp took {} with {} items\n", .{ elapsed, 1 }); } const from: Pubkey = pull[0]; @@ -469,12 +473,11 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - std.debug.print("handle batch prune took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch prune took {} with {} items\n", .{ elapsed, 1 }); } const prune_msg: PruneData = prune[1]; - var endpoint_buf = std.ArrayList(u8).init(self.allocator); - try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); + var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); defer endpoint_buf.deinit(); var prune_log_entry = self.logger @@ -482,9 +485,7 @@ pub const GossipService = struct { .field("from_pubkey", &prune_msg.pubkey.string()) .field("num_prunes", prune_msg.prunes.len); - self.handlePruneMessage( - &prune_msg, - ) catch |err| { + self.handlePruneMessage(&prune_msg) catch |err| { prune_log_entry.field("error", @errorName(err)) .err("error handling prune message"); continue; @@ -496,11 +497,10 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - std.debug.print("handle batch ping took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch ping took {} with {} items\n", .{ elapsed, 1 }); } - var endpoint_buf = std.ArrayList(u8).init(self.allocator); - try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); + var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); defer endpoint_buf.deinit(); var ping_log_entry = self.logger @@ -524,20 +524,24 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - std.debug.print("handle batch pong took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch pong took {} with {} items\n", .{ elapsed, 1 }); } - var endpoint_buf = std.ArrayList(u8).init(self.allocator); - try from_endpoint.format(&[_]u8{}, std.fmt.FormatOptions{}, endpoint_buf.writer()); + var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); defer endpoint_buf.deinit(); { + const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); + var ping_cache_lock = self.ping_cache_rw.write(); defer ping_cache_lock.unlock(); - var ping_cache: *PingCache = ping_cache_lock.mut(); - const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); - _ = ping_cache.receviedPong(pong, SocketAddr.fromEndpoint(&from_endpoint), now); + + _ = ping_cache.receviedPong( + pong, + SocketAddr.fromEndpoint(&from_endpoint), + now, + ); } self.logger @@ -554,7 +558,7 @@ pub const GossipService = struct { const length = pull_requests.items.len; self.handleBatchPullRequest(pull_requests); const elapsed = x_timer.read(); - std.debug.print("handle batch pull_req took {} with {} items\n", .{ elapsed, length }); + self.logger.debugf("handle batch pull_req took {} with {} items\n", .{ elapsed, length }); for (pull_requests.items) |*pr| { pr.filter.deinit(); @@ -566,7 +570,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - std.debug.print("handle batch crds_trim took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch crds_trim took {} with {} items\n", .{ elapsed, 1 }); } var crds_table_lock = self.crds_table_rw.write(); @@ -579,8 +583,8 @@ pub const GossipService = struct { } const elapsed = timer.read(); - std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); - self.messages_processed.store(msg_count, std.atomic.Ordering.Unordered); + self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + self.messages_processed.store(msg_count, std.atomic.Ordering.Release); } self.logger.debugf("process_messages loop closed\n", .{}); @@ -2314,38 +2318,27 @@ test "gossip.gossip_service: init, exit, and deinit" { const fuzz = @import("./fuzz.zig"); -const Sender = struct { - const Self = @This(); - - gs: *GossipService, - to_endpoint: EndPoint, - - pub fn send(self: *Self, msg: Protocol) void { - self.gs.verified_incoming_channel.send(ProtocolMessage{ - .message = msg, - .from_endpoint = self.to_endpoint, - }) catch @panic("ahhhh"); - } -}; - pub const BenchmarkGossipServiceGeneral = struct { pub const min_iterations = 1; - pub const max_iterations = 5; + pub const max_iterations = 3; - // TODO: bigger values ltr pub const args = [_]usize{ - 10, - 100, + 1_000, + 5_000, + 10_000, }; pub const arg_names = [_][]const u8{ - "10_msg_iters", "100_msg_iters", + "1k_msgs", + "5k_msgs", + "10k_msg_iters", }; pub fn benchmarkGossipServiceProcessMessages(num_message_iterations: usize) !void { const allocator = std.heap.page_allocator; var keypair = try KeyPair.create(null); - var address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); + var address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 8888); + var endpoint = address.toEndpoint(); var pubkey = Pubkey.fromPublicKey(&keypair.public_key, false); var contact_info = crds.LegacyContactInfo.default(pubkey); @@ -2357,6 +2350,7 @@ pub const BenchmarkGossipServiceGeneral = struct { // logger.spawn(); var logger: Logger = .noop; + // process incoming packets/messsages var exit = AtomicBool.init(false); var gossip_service = try GossipService.init( allocator, @@ -2368,30 +2362,45 @@ pub const BenchmarkGossipServiceGeneral = struct { ); defer gossip_service.deinit(); - var packet_handle = try Thread.spawn(.{}, GossipService.processMessages, .{ + var packet_handle = try Thread.spawn(.{}, GossipService.runSpy, .{ &gossip_service, }); + // send incomign packets/messages + var outgoing_channel = Channel(Packet).init(allocator, 10_000); + defer outgoing_channel.deinit(); + + var socket = UdpSocket.create(.ipv4, .udp) catch return error.SocketCreateFailed; + socket.bindToPort(8889) catch return error.SocketBindFailed; + socket.setReadTimeout(1000000) catch return error.SocketSetTimeoutFailed; // 1 second + defer { + socket.close(); + } + + var outgoing_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ + &socket, + outgoing_channel, + &exit, + logger, + }); + + // generate messages var rand = std.rand.DefaultPrng.init(19); var rng = rand.random(); - var sender = Sender{ - .gs = &gossip_service, - .to_endpoint = address.toEndpoint(), - }; var sender_keypair = try KeyPair.create(null); var msg_sent: usize = 0; - for (0..num_message_iterations) |_| { + while (msg_sent < num_message_iterations) { // send a ping message { - var msg = try fuzz.randomPing(rng, &keypair); - sender.send(msg); + var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); + try outgoing_channel.send(msg); msg_sent += 1; } // send a pong message { - var msg = try fuzz.randomPong(rng, &keypair); - sender.send(msg); + var msg = try fuzz.randomPongPacket(rng, &keypair, endpoint); + try outgoing_channel.send(msg); msg_sent += 1; } // send a push message @@ -2400,8 +2409,7 @@ pub const BenchmarkGossipServiceGeneral = struct { defer packets.deinit(); for (packets.items) |packet| { - var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); - sender.send(msg); + try outgoing_channel.send(packet); msg_sent += 1; } } @@ -2411,29 +2419,29 @@ pub const BenchmarkGossipServiceGeneral = struct { defer packets.deinit(); for (packets.items) |packet| { - var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); - sender.send(msg); + try outgoing_channel.send(packet); msg_sent += 1; } } // send a pull request { var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); - var msg = try bincode.readFromSlice(allocator, Protocol, packet.data[0..packet.size], bincode.Params{}); - sender.send(msg); + try outgoing_channel.send(packet); msg_sent += 1; } } + // wait for all messages to be processed while (true) { - const v = gossip_service.messages_processed.load(std.atomic.Ordering.Unordered); - if (v == msg_sent) { + const v = gossip_service.messages_processed.load(std.atomic.Ordering.Acquire); + if (v >= msg_sent) { break; } } exit.store(true, std.atomic.Ordering.Unordered); packet_handle.join(); + outgoing_handle.join(); } }; From e51f094faff94a24334666f1c026609f46a7aa98 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 27 Sep 2023 09:02:15 -0400 Subject: [PATCH 14/72] packet batch impl + parralel verification --- src/cmd/cmd.zig | 9 +- src/gossip/gossip_service.zig | 162 +++++++++++++++++++++++++--------- src/gossip/socket_utils.zig | 20 ++--- 3 files changed, 134 insertions(+), 57 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 64ec4ee2a..e5505f18a 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -70,10 +70,11 @@ fn identity(_: []const []const u8) !void { // gossip entrypoint fn gossip(_: []const []const u8) !void { - // var logger = Logger.init(gpa_allocator, .debug); - // defer logger.deinit(); - // logger.spawn(); - var logger: Logger = .noop; + var logger = Logger.init(gpa_allocator, .debug); + defer logger.deinit(); + logger.spawn(); + + // var logger: Logger = .noop; var my_keypair = try getOrInitIdentity(gpa_allocator, logger); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index b6430428e..8e315e073 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -55,6 +55,8 @@ const socket_utils = @import("socket_utils.zig"); const Channel = @import("../sync/channel.zig").Channel; const PacketChannel = Channel(Packet); +const PacketBatchChannel = Channel(std.ArrayList(Packet)); + const ProtocolMessage = struct { from_endpoint: EndPoint, message: Protocol }; const ProtocolChannel = Channel(ProtocolMessage); const PingCache = @import("./ping_pong.zig").PingCache; @@ -99,7 +101,7 @@ pub const GossipService = struct { exit: *AtomicBool, // communication between threads - packet_incoming_channel: *PacketChannel, + packet_incoming_channel: *PacketBatchChannel, packet_outgoing_channel: *PacketChannel, verified_incoming_channel: *ProtocolChannel, @@ -129,7 +131,7 @@ pub const GossipService = struct { exit: *AtomicBool, logger: Logger, ) error{ OutOfMemory, SocketCreateFailed, SocketBindFailed, SocketSetTimeoutFailed }!Self { - var packet_incoming_channel = PacketChannel.init(allocator, 10000); + var packet_incoming_channel = PacketBatchChannel.init(allocator, 10000); var packet_outgoing_channel = PacketChannel.init(allocator, 10000); var verified_incoming_channel = ProtocolChannel.init(allocator, 10000); @@ -242,14 +244,21 @@ pub const GossipService = struct { /// 4) build message loop (to send outgoing message) /// and 5) a socket responder (to send outgoing packets) pub fn run(self: *Self) !void { - var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); - defer self.joinAndExit(&ip_echo_server_listener_handle); + // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); + // defer self.joinAndExit(&ip_echo_server_listener_handle); - var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ + // var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ + // &self.gossip_socket, + // self.packet_incoming_channel, + // self.exit, + // self.logger, + // }); + + var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocketV2, .{ + self.allocator, &self.gossip_socket, self.packet_incoming_channel, self.exit, - self.logger, }); defer self.joinAndExit(&receiver_handle); @@ -276,11 +285,19 @@ pub const GossipService = struct { // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); // defer self.joinAndExit(&ip_echo_server_listener_handle); - var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ + // var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ + // &self.gossip_socket, + // self.packet_incoming_channel, + // self.exit, + // self.logger, + // }); + // defer self.joinAndExit(&receiver_handle); + + var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocketV2, .{ + self.allocator, &self.gossip_socket, self.packet_incoming_channel, self.exit, - self.logger, }); defer self.joinAndExit(&receiver_handle); @@ -300,11 +317,62 @@ pub const GossipService = struct { defer self.joinAndExit(&responder_handle); } + const VerifyMessageTask = struct { + packet: *const Packet, + allocator: std.mem.Allocator, + verified_incoming_channel: *Channel(ProtocolMessage), + + task: Task, + done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + + pub fn callback(task: *Task) void { + var this = @fieldParentPtr(@This(), "task", task); + defer this.done.store(true, std.atomic.Ordering.Release); + + var protocol_message = bincode.readFromSlice( + this.allocator, + Protocol, + this.packet.data[0..this.packet.size], + bincode.Params.standard, + ) catch { + return; + }; + + protocol_message.sanitize() catch { + bincode.free(this.allocator, protocol_message); + return; + }; + + protocol_message.verifySignature() catch { + bincode.free(this.allocator, protocol_message); + return; + }; + + const msg = ProtocolMessage{ + .from_endpoint = this.packet.addr, + .message = protocol_message, + }; + this.verified_incoming_channel.send(msg) catch unreachable; + } + }; + /// main logic for deserializing Packets into Protocol messages /// and verifing they have valid values, and have valid signatures. /// Verified Protocol messages are then sent to the verified_channel. fn verifyPackets(self: *Self) !void { - var failed_protocol_msgs: usize = 0; + var tasks: [socket_utils.PACKETS_PER_BATCH]*VerifyMessageTask = undefined; + // pre-allocate all the tasks + for (0..tasks.len) |i| { + const verify_task = VerifyMessageTask{ + .task = .{ .callback = VerifyMessageTask.callback }, + .allocator = self.allocator, + .verified_incoming_channel = self.verified_incoming_channel, + .packet = &Packet.default(), + }; + var verify_task_heap = try self.allocator.create(VerifyMessageTask); + verify_task_heap.* = verify_task; + tasks[i] = verify_task_heap; + } while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_packets = try self.packet_incoming_channel.try_drain(); @@ -314,36 +382,32 @@ pub const GossipService = struct { continue; } - const packets = maybe_packets.?; - defer self.packet_incoming_channel.allocator.free(packets); - - for (packets) |*packet| { - var protocol_message = bincode.readFromSlice( - self.allocator, - Protocol, - packet.data[0..packet.size], - bincode.Params.standard, - ) catch { - failed_protocol_msgs += 1; - self.logger.debugf("failed to deserialize protocol message: {d}\n", .{std.mem.readIntLittle(u32, packet.data[0..4])}); - continue; - }; + const packet_batches = maybe_packets.?; + defer self.packet_incoming_channel.allocator.free(packet_batches); + defer { + for (packet_batches) |*packet_batch| { + packet_batch.deinit(); + } + } - protocol_message.sanitize() catch |err| { - self.logger.debugf("failed to sanitize protocol message: {s}\n", .{@errorName(err)}); - bincode.free(self.allocator, protocol_message); - continue; - }; + // verify in parallel using the threadpool + var count: usize = 0; + for (packet_batches) |*packet_batch| { + for (packet_batch.items) |*packet| { + var task = tasks[count]; + task.packet = packet; + const batch = Batch.from(&task.task); + ThreadPool.schedule(self.thread_pool, batch); - protocol_message.verifySignature() catch |err| { - self.logger.debugf("failed to verify protocol message signature {s}\n", .{@errorName(err)}); - bincode.free(self.allocator, protocol_message); - continue; - }; + count += 1; + } + } - // TODO: send the pointers over the channel (similar to PinnedVec) vs item copy - const msg = ProtocolMessage{ .from_endpoint = packet.addr, .message = protocol_message }; - try self.verified_incoming_channel.send(msg); + for (tasks[0..count]) |task| { + while (!task.done.load(std.atomic.Ordering.Acquire)) { + // wait + } + task.done.store(false, std.atomic.Ordering.Release); } } @@ -2117,10 +2181,13 @@ test "gossip.gossip_service: test packet verification" { var buf = [_]u8{0} ** PACKET_DATA_SIZE; var out = try bincode.writeToSlice(buf[0..], protocol_msg, bincode.Params{}); var packet = Packet.init(from, buf, out.len); - + var packet_batch = std.ArrayList(Packet).init(allocator); for (0..3) |_| { - try packet_channel.send(packet); + try packet_batch.append(packet); } + try packet_channel.send(packet_batch); + + var packet_batch_2 = std.ArrayList(Packet).init(allocator); // send one which fails sanitization var value_v2 = try CrdsValue.initSigned(crds.CrdsData.randomFromIndex(rng.random(), 2), &keypair); @@ -2132,7 +2199,7 @@ test "gossip.gossip_service: test packet verification" { var buf_v2 = [_]u8{0} ** PACKET_DATA_SIZE; var out_v2 = try bincode.writeToSlice(buf_v2[0..], protocol_msg_v2, bincode.Params{}); var packet_v2 = Packet.init(from, buf_v2, out_v2.len); - try packet_channel.send(packet_v2); + try packet_batch_2.append(packet_v2); // send one with a incorrect signature var rand_keypair = try KeyPair.create([_]u8{3} ** 32); @@ -2144,7 +2211,7 @@ test "gossip.gossip_service: test packet verification" { var buf2 = [_]u8{0} ** PACKET_DATA_SIZE; var out2 = try bincode.writeToSlice(buf2[0..], protocol_msg2, bincode.Params{}); var packet2 = Packet.init(from, buf2, out2.len); - try packet_channel.send(packet2); + try packet_batch_2.append(packet2); // send it with a CrdsValue which hash a slice { @@ -2164,8 +2231,9 @@ test "gossip.gossip_service: test packet verification" { var buf3 = [_]u8{0} ** PACKET_DATA_SIZE; var out3 = try bincode.writeToSlice(buf3[0..], protocol_msg3, bincode.Params{}); var packet3 = Packet.init(from, buf3, out3.len); - try packet_channel.send(packet3); + try packet_batch_2.append(packet3); } + try packet_channel.send(packet_batch_2); var msg_count: usize = 0; while (msg_count < 4) { @@ -2310,7 +2378,7 @@ test "gossip.gossip_service: init, exit, and deinit" { .{&gossip_service}, ); - gossip_service.echo_server.kill(); + // gossip_service.echo_server.kill(); exit.store(true, std.atomic.Ordering.Unordered); handle.join(); gossip_service.deinit(); @@ -2377,10 +2445,11 @@ pub const BenchmarkGossipServiceGeneral = struct { socket.close(); } + var sender_exit = AtomicBool.init(false); var outgoing_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ &socket, outgoing_channel, - &exit, + &sender_exit, logger, }); @@ -2440,7 +2509,14 @@ pub const BenchmarkGossipServiceGeneral = struct { } exit.store(true, std.atomic.Ordering.Unordered); + // send a few more to make sure the socket exits + for (0..5) |_| { + var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); + try outgoing_channel.send(msg); + } packet_handle.join(); + + sender_exit.store(true, std.atomic.Ordering.Unordered); outgoing_handle.join(); } }; diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index 9599c1bc3..101c7143c 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -65,7 +65,7 @@ pub fn readSocketV2( allocator, capacity, ); - for (0..capacity) |_| { + for (0..capacity) |_| { packet_batch.appendAssumeCapacity(Packet.default()); } @@ -74,29 +74,29 @@ pub fn readSocketV2( var timer = std.time.Timer.start() catch unreachable; // recv packets into batch - while (true) { - var n_packets_read = recvMmsg(socket, packet_batch.items[count..capacity]) catch |err| { - if (count > 0 and err == error.WouldBlock) { - if (timer.read() > MAX_WAIT_NS) { + while (true) { + var n_packets_read = recvMmsg(socket, packet_batch.items[count..capacity]) catch |err| { + if (count > 0 and err == error.WouldBlock) { + if (timer.read() > MAX_WAIT_NS) { break; } continue; - } else { + } else { return err; } }; - if (count == 0) { + if (count == 0) { // set to nonblocking mode try socket.setReadTimeout(SOCKET_TIMEOUT); } count += n_packets_read; - if (timer.read() > MAX_WAIT_NS or count >= capacity) { + if (timer.read() > MAX_WAIT_NS or count >= capacity) { break; } } - if (count < capacity) { + if (count < capacity) { packet_batch.shrinkAndFree(count); } try incoming_channel.send(packet_batch); @@ -303,7 +303,7 @@ pub fn benchmarkChannelRecvV2( const values = (try channel.try_drain()) orelse { continue; }; - for (values) |packet_batch| { + for (values) |packet_batch| { count += packet_batch.items.len; } if (count >= n_values_to_receive) { From 864df7d99e607cb5ce3bf0af1ceb57a14ddf2d26 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 27 Sep 2023 13:34:49 -0400 Subject: [PATCH 15/72] packet batch responses --- src/gossip/fuzz.zig | 11 +- src/gossip/gossip_service.zig | 256 ++++++++++++++++++++++------------ src/gossip/socket_utils.zig | 37 +++++ 3 files changed, 212 insertions(+), 92 deletions(-) diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 476af3fc0..52e88486b 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -378,15 +378,20 @@ pub fn main() !void { continue; }; - try gossip_service_fuzzer.packet_outgoing_channel.send(send_packet); + // batch it + var packet_batch = std.ArrayList(Packet).init(allocator); + try packet_batch.append(send_packet); + msg_count +|= 1; var send_duplicate = rng.random().boolean(); if (send_duplicate) { msg_count +|= 1; - try gossip_service_fuzzer.packet_outgoing_channel.send(send_packet); + try packet_batch.append(send_packet); } - msg_count +|= 1; + // send it + try gossip_service_fuzzer.packet_outgoing_channel.send(packet_batch); + std.time.sleep(SLEEP_TIME); if (msg_count % 1000 == 0) { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 8e315e073..c7b28c3a2 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -102,7 +102,7 @@ pub const GossipService = struct { // communication between threads packet_incoming_channel: *PacketBatchChannel, - packet_outgoing_channel: *PacketChannel, + packet_outgoing_channel: *PacketBatchChannel, verified_incoming_channel: *ProtocolChannel, crds_table_rw: RwMux(CrdsTable), @@ -132,7 +132,7 @@ pub const GossipService = struct { logger: Logger, ) error{ OutOfMemory, SocketCreateFailed, SocketBindFailed, SocketSetTimeoutFailed }!Self { var packet_incoming_channel = PacketBatchChannel.init(allocator, 10000); - var packet_outgoing_channel = PacketChannel.init(allocator, 10000); + var packet_outgoing_channel = PacketBatchChannel.init(allocator, 10000); var verified_incoming_channel = ProtocolChannel.init(allocator, 10000); errdefer { @@ -272,7 +272,8 @@ pub const GossipService = struct { defer self.joinAndExit(&build_messages_handle); // outputer thread - var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ + // var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ + var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocketV2, .{ &self.gossip_socket, self.packet_outgoing_channel, self.exit, @@ -308,7 +309,8 @@ pub const GossipService = struct { defer self.joinAndExit(&packet_handle); // outputer thread - var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ + // var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ + var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocketV2, .{ &self.gossip_socket, self.packet_outgoing_channel, self.exit, @@ -362,7 +364,7 @@ pub const GossipService = struct { fn verifyPackets(self: *Self) !void { var tasks: [socket_utils.PACKETS_PER_BATCH]*VerifyMessageTask = undefined; // pre-allocate all the tasks - for (0..tasks.len) |i| { + for (0..tasks.len) |i| { const verify_task = VerifyMessageTask{ .task = .{ .callback = VerifyMessageTask.callback }, .allocator = self.allocator, @@ -383,11 +385,11 @@ pub const GossipService = struct { } const packet_batches = maybe_packets.?; - defer self.packet_incoming_channel.allocator.free(packet_batches); - defer { + defer { for (packet_batches) |*packet_batch| { packet_batch.deinit(); } + self.packet_incoming_channel.allocator.free(packet_batches); } // verify in parallel using the threadpool @@ -396,6 +398,7 @@ pub const GossipService = struct { for (packet_batch.items) |*packet| { var task = tasks[count]; task.packet = packet; + const batch = Batch.from(&task.task); ThreadPool.schedule(self.thread_pool, batch); @@ -420,14 +423,36 @@ pub const GossipService = struct { from_endpoint: EndPoint, }; + pub const PongMessage = struct { + pong: *Pong, + from_endpoint: *EndPoint, + }; + + pub const PingMessage = struct { + ping: *Ping, + from_endpoint: *EndPoint, + }; + /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; var msg_count: usize = 0; + const init_message_size = socket_utils.PACKETS_PER_BATCH; - var pull_requests = try std.ArrayList(PullRequestMessage).initCapacity(self.allocator, 100); + // // batching messages can lead to 1) less lock contention and 2) use of packetbatch which + // // are pre-allocated packets for responses 3) processing messages in parallel + // batch so we can process in parallel + var pull_requests = try std.ArrayList(PullRequestMessage).initCapacity(self.allocator, init_message_size); defer pull_requests.deinit(); + // batch so we can reduce the ping_cache locks + var pong_messages = try std.ArrayList(PongMessage).initCapacity(self.allocator, init_message_size); + defer pong_messages.deinit(); + + // batch so we can respond with a packet batch + var ping_messages = try std.ArrayList(PingMessage).initCapacity(self.allocator, init_message_size); + defer ping_messages.deinit(); + while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { @@ -479,12 +504,12 @@ pub const GossipService = struct { .err("error building prune messages"); continue; }; - defer prune_packets.deinit(); + // // TODO: fix this too + // defer prune_packets.deinit(); _ = push_log_entry.field("num_prune_msgs", prune_packets.items.len); - for (prune_packets.items) |packet| { - try self.packet_outgoing_channel.send(packet); - } + // TODO: pre-allocate this packet batch + try self.packet_outgoing_channel.send(prune_packets); } push_log_entry.info("received push message"); @@ -512,7 +537,7 @@ pub const GossipService = struct { continue; }; - // pull_log_entry.info("received pull response"); + pull_log_entry.info("received pull response"); }, .PullRequest => |*pull| { var pull_value: CrdsValue = pull[1]; // contact info @@ -558,65 +583,49 @@ pub const GossipService = struct { prune_log_entry.info("received prune message"); }, .PingMessage => |*ping| { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - self.logger.debugf("handle batch ping took {} with {} items\n", .{ elapsed, 1 }); - } - - var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); - defer endpoint_buf.deinit(); - - var ping_log_entry = self.logger - .field("from_endpoint", endpoint_buf.items) - .field("from_pubkey", &ping.from.string()); - - const packet = self.handlePingMessage(ping, from_endpoint) catch |err| { - ping_log_entry - .field("error", @errorName(err)) - .err("error handling ping message"); - continue; - }; + // TODO: filter out endpoints which are unspecificed / port = 0 - try self.packet_outgoing_channel.send(packet); + try ping_messages.append(PingMessage{ + .ping = ping, + .from_endpoint = &from_endpoint, + }); - ping_log_entry - .field("pongs sent", 1) - .info("received ping message"); + // var x_timer = std.time.Timer.start() catch unreachable; + // defer { + // const elapsed = x_timer.read(); + // self.logger.debugf("handle batch ping took {} with {} items\n", .{ elapsed, 1 }); + // } + + // var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); + // defer endpoint_buf.deinit(); + + // var ping_log_entry = self.logger + // .field("from_endpoint", endpoint_buf.items) + // .field("from_pubkey", &ping.from.string()); + + // const packet = self.handlePingMessage(ping, from_endpoint) catch |err| { + // ping_log_entry + // .field("error", @errorName(err)) + // .err("error handling ping message"); + // continue; + // }; + // try self.packet_outgoing_channel.send(packet); + + // ping_log_entry + // .field("pongs sent", 1) + // .info("received ping message"); }, .PongMessage => |*pong| { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - self.logger.debugf("handle batch pong took {} with {} items\n", .{ elapsed, 1 }); - } - - var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); - defer endpoint_buf.deinit(); - - { - const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); - - var ping_cache_lock = self.ping_cache_rw.write(); - defer ping_cache_lock.unlock(); - var ping_cache: *PingCache = ping_cache_lock.mut(); - - _ = ping_cache.receviedPong( - pong, - SocketAddr.fromEndpoint(&from_endpoint), - now, - ); - } - - self.logger - .field("from_endpoint", endpoint_buf.items) - .field("from_pubkey", &pong.from.string()) - .info("received pong message"); + try pong_messages.append(PongMessage{ + .pong = pong, + .from_endpoint = &from_endpoint, + }); }, } } // handle batch messages + // PULL REQ if (pull_requests.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_requests.items.len; @@ -627,9 +636,63 @@ pub const GossipService = struct { for (pull_requests.items) |*pr| { pr.filter.deinit(); } + pull_requests.clearRetainingCapacity(); + } + + // PING + const n_ping_messages = ping_messages.items.len; + if (n_ping_messages > 0) { + var x_timer = std.time.Timer.start() catch unreachable; + + // init a new batch of responses + var ping_packet_batch = try std.ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); + for (0..n_ping_messages) |_| { + ping_packet_batch.appendAssumeCapacity(Packet.default()); + } + + for (ping_messages.items, 0..) |*ping_message, i| { + const pong = try Pong.init(ping_message.ping, &self.my_keypair); + const pong_message = Protocol{ .PongMessage = pong }; + + var packet = &ping_packet_batch.items[i]; + const bytes_written = try bincode.writeToSlice( + &packet.data, + pong_message, + bincode.Params.standard, + ); + + packet.size = bytes_written.len; + packet.addr = ping_message.from_endpoint.*; + } + try self.packet_outgoing_channel.send(ping_packet_batch); + + self.logger.debugf("handle batch ping took {} with {} items\n", .{ x_timer.read(), n_ping_messages }); + ping_messages.clearRetainingCapacity(); + } + + // PONG + if (pong_messages.items.len > 0) { + var x_timer = std.time.Timer.start() catch unreachable; + const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); + const length = pong_messages.items.len; + + var ping_cache_lock = self.ping_cache_rw.write(); + defer ping_cache_lock.unlock(); + var ping_cache: *PingCache = ping_cache_lock.mut(); + + for (pong_messages.items) |*pong_message| { + _ = ping_cache.receviedPong( + pong_message.pong, + SocketAddr.fromEndpoint(pong_message.from_endpoint), + now, + ); + } + + self.logger.debugf("handle batch pong took {} with {} items\n", .{ x_timer.read(), length }); + pong_messages.clearRetainingCapacity(); } - pull_requests.clearRetainingCapacity(); + // TRIM crds-table { var x_timer = std.time.Timer.start() catch unreachable; defer { @@ -1012,9 +1075,11 @@ pub const GossipService = struct { const PullRequestTask = struct { allocator: std.mem.Allocator, + my_pubkey: *const Pubkey, + from_endpoint: *const EndPoint, filter: CrdsFilter, crds_table: *const CrdsTable, - output: std.ArrayList(CrdsValue), + output: std.ArrayList(Packet), task: Task, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), @@ -1039,11 +1104,20 @@ pub const GossipService = struct { }; defer response_crds_values.deinit(); - this.output.appendSlice(response_crds_values.items) catch { - // std.debug.print("append slice failed\n", .{}); + const maybe_packets = crdsValuesToPackets( + this.allocator, + this.my_pubkey, + response_crds_values.items, + this.from_endpoint, + ChunkType.PullResponse, + ) catch { return; }; - // std.debug.print("success: len = {}\n", .{ response_crds_values.items.len }); + + if (maybe_packets) |*packets| { + defer packets.deinit(); + this.output.appendSlice(packets.items) catch unreachable; + } } }; @@ -1074,7 +1148,9 @@ pub const GossipService = struct { defer ping_cache_lock.unlock(); var ping_cache: *PingCache = ping_cache_lock.mut(); - var ping_buff = [_]u8{0} ** PACKET_DATA_SIZE; + // TODO: only allocate this once + var ping_packets = try std.ArrayList(Packet).initCapacity(self.allocator, n_requests); + var count: usize = 0; for (pull_requests.items, 0..) |req, i| { // filter out valid peers and send ping messages to peers @@ -1090,16 +1166,26 @@ pub const GossipService = struct { // send a ping if (result.maybe_ping) |ping| { + ping_packets.appendAssumeCapacity(Packet.default()); + var packet = &ping_packets.items[count]; + var protocol_msg = Protocol{ .PingMessage = ping }; - var serialized_ping = bincode.writeToSlice(&ping_buff, protocol_msg, .{}) catch return error.SerializationError; - var packet = Packet.init(req.from_endpoint, ping_buff, serialized_ping.len); - try self.packet_outgoing_channel.send(packet); + var serialized_ping = bincode.writeToSlice(&packet.data, protocol_msg, .{}) catch return error.SerializationError; + packet.addr = req.from_endpoint; + packet.size = serialized_ping.len; + + count += 1; } if (result.passes_ping_check) { valid_indexs.appendAssumeCapacity(i); } } + + // send the pings + if (count > 0) { + try self.packet_outgoing_channel.send(ping_packets); + } } if (valid_indexs.items.len == 0) { @@ -1111,7 +1197,6 @@ pub const GossipService = struct { var tasks = try std.ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); defer { for (tasks.items) |task| { - task.deinit(); self.allocator.destroy(task); } tasks.deinit(); @@ -1123,10 +1208,13 @@ pub const GossipService = struct { defer crds_table_lock.unlock(); for (valid_indexs.items) |i| { + // TODO: pre-allocate these tasks // create the thread task - var output = std.ArrayList(CrdsValue).init(self.allocator); + var output = std.ArrayList(Packet).init(self.allocator); var task = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, + .my_pubkey = &self.my_pubkey, + .from_endpoint = &pull_requests.items[i].from_endpoint, .filter = pull_requests.items[i].filter, .crds_table = crds_table, .output = output, @@ -1151,20 +1239,9 @@ pub const GossipService = struct { } } - for (tasks.items, valid_indexs.items) |task, message_i| { - const from_endpoint = pull_requests.items[message_i].from_endpoint; - const maybe_packets = try crdsValuesToPackets( - self.allocator, - &self.my_pubkey, - task.output.items, - &from_endpoint, - ChunkType.PullResponse, - ); - if (maybe_packets) |packets| { - defer packets.deinit(); - for (packets.items) |packet| { - try self.packet_outgoing_channel.send(packet); - } + for (tasks.items) |task| { + if (task.output.items.len > 0) { + try self.packet_outgoing_channel.send(task.output); } } } @@ -1388,6 +1465,7 @@ pub const GossipService = struct { // update active set const from_pubkey = prune_data.pubkey; + // TODO: process in batches to remove this lock var active_set_lock = self.active_set_rw.write(); defer active_set_lock.unlock(); @@ -2509,7 +2587,7 @@ pub const BenchmarkGossipServiceGeneral = struct { } exit.store(true, std.atomic.Ordering.Unordered); - // send a few more to make sure the socket exits + // send a few more to make sure the socket exits for (0..5) |_| { var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); try outgoing_channel.send(msg); diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index 101c7143c..45f9e7bfa 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -139,6 +139,43 @@ pub fn recvMmsg( return count; } +pub fn sendSocketV2( + socket: *UdpSocket, + outgoing_channel: *Channel(std.ArrayList(Packet)), + exit: *const std.atomic.Atomic(bool), + logger: Logger, +) error{ SocketSendError, OutOfMemory, ChannelClosed }!void { + var packets_sent: u64 = 0; + + while (!exit.load(std.atomic.Ordering.Unordered)) { + const maybe_packet_batches = try outgoing_channel.try_drain(); + if (maybe_packet_batches == null) { + // sleep for 1ms + // std.time.sleep(std.time.ns_per_ms * 1); + continue; + } + const packet_batches = maybe_packet_batches.?; + defer { + for (packet_batches) |*packet_batch| { + packet_batch.deinit(); + } + outgoing_channel.allocator.free(packet_batches); + } + + for (packet_batches) |*packet_batch| { + for (packet_batch.items) |*p| { + const bytes_sent = socket.sendTo(p.addr, p.data[0..p.size]) catch |e| { + logger.debugf("send_socket error: {s}\n", .{@errorName(e)}); + continue; + }; + packets_sent +|= 1; + std.debug.assert(bytes_sent == p.size); + } + } + } + logger.debugf("send_socket loop closed\n", .{}); +} + pub fn sendSocket( socket: *UdpSocket, outgoing_channel: *Channel(Packet), From 4b87a2584aca2a6bbde7820607f629a7d5f80e88 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 27 Sep 2023 14:57:08 -0400 Subject: [PATCH 16/72] batch push messages --- src/bincode/bincode.zig | 1 - src/gossip/gossip_service.zig | 237 +++++++++++++++++++++++++--------- 2 files changed, 179 insertions(+), 59 deletions(-) diff --git a/src/bincode/bincode.zig b/src/bincode/bincode.zig index 39bbbc442..65a11bc4d 100644 --- a/src/bincode/bincode.zig +++ b/src/bincode/bincode.zig @@ -588,7 +588,6 @@ pub fn readFromSlice(alloc: ?std.mem.Allocator, comptime T: type, slice: []const var d = deserializer(reader, params); const dd = d.deserializer(); const v = try getty.deserialize(alloc, T, dd); - errdefer getty.de.free(alloc, @TypeOf(dd), v); // ! return v; } diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index c7b28c3a2..cab272c75 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -37,6 +37,7 @@ const CrdsTable = _crds_table.CrdsTable; const CrdsError = _crds_table.CrdsError; const HashTimeQueue = _crds_table.HashTimeQueue; const CRDS_UNIQUE_PUBKEY_CAPACITY = _crds_table.CRDS_UNIQUE_PUBKEY_CAPACITY; +const AutoArrayHashSet = _crds_table.AutoArrayHashSet; const Logger = @import("../trace/log.zig").Logger; const DoNothingSink = @import("../trace/log.zig").DoNothingSink; @@ -352,6 +353,7 @@ pub const GossipService = struct { const msg = ProtocolMessage{ .from_endpoint = this.packet.addr, + // TODO: remove this copy (its on the heap - should just need a ptr) .message = protocol_message, }; this.verified_incoming_channel.send(msg) catch unreachable; @@ -433,6 +435,12 @@ pub const GossipService = struct { from_endpoint: *EndPoint, }; + pub const PushMessage = struct { + crds_values: []CrdsValue, + from_pubkey: *Pubkey, + from_endpoint: *EndPoint, + }; + /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; @@ -441,6 +449,7 @@ pub const GossipService = struct { // // batching messages can lead to 1) less lock contention and 2) use of packetbatch which // // are pre-allocated packets for responses 3) processing messages in parallel + // batch so we can process in parallel var pull_requests = try std.ArrayList(PullRequestMessage).initCapacity(self.allocator, init_message_size); defer pull_requests.deinit(); @@ -453,6 +462,9 @@ pub const GossipService = struct { var ping_messages = try std.ArrayList(PingMessage).initCapacity(self.allocator, init_message_size); defer ping_messages.deinit(); + var push_messages = try std.ArrayList(PushMessage).initCapacity(self.allocator, init_message_size); + defer push_messages.deinit(); + while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { @@ -481,38 +493,44 @@ pub const GossipService = struct { self.logger.debugf("handle batch push took {} with {} items\n", .{ elapsed, 1 }); } - const push_from: Pubkey = push[0]; - const push_values: []CrdsValue = push[1]; + try push_messages.append(PushMessage{ + .crds_values = push[1], + .from_pubkey = &push[0], + .from_endpoint = &from_endpoint, + }); - var push_log_entry = self.logger - .field("num_crds_values", push_values.len) - .field("from_address", &push_from.string()); + // const push_from: Pubkey = push[0]; + // const push_values: []CrdsValue = push[1]; - var failed_insert_origins = self.handlePushMessage( - push_values, - ) catch |err| { - push_log_entry.field("error", @errorName(err)) - .err("error handling push message"); - continue; - }; - defer failed_insert_origins.deinit(); - _ = push_log_entry.field("num_failed_insert_origins", failed_insert_origins.count()); - - if (failed_insert_origins.count() != 0) { - var prune_packets = self.buildPruneMessage(&failed_insert_origins, push_from) catch |err| { - push_log_entry.field("error", @errorName(err)) - .err("error building prune messages"); - continue; - }; - // // TODO: fix this too - // defer prune_packets.deinit(); - - _ = push_log_entry.field("num_prune_msgs", prune_packets.items.len); - // TODO: pre-allocate this packet batch - try self.packet_outgoing_channel.send(prune_packets); - } + // var push_log_entry = self.logger + // .field("num_crds_values", push_values.len) + // .field("from_address", &push_from.string()); + + // var failed_insert_origins = self.handlePushMessage( + // push_values, + // ) catch |err| { + // push_log_entry.field("error", @errorName(err)) + // .err("error handling push message"); + // continue; + // }; + // defer failed_insert_origins.deinit(); + // _ = push_log_entry.field("num_failed_insert_origins", failed_insert_origins.count()); + + // if (failed_insert_origins.count() != 0) { + // var prune_packets = self.buildPruneMessage(&failed_insert_origins, push_from) catch |err| { + // push_log_entry.field("error", @errorName(err)) + // .err("error building prune messages"); + // continue; + // }; + // // // TODO: fix this too + // // defer prune_packets.deinit(); + + // _ = push_log_entry.field("num_prune_msgs", prune_packets.items.len); + // // TODO: pre-allocate this packet batch + // try self.packet_outgoing_channel.send(prune_packets); + // } - push_log_entry.info("received push message"); + // push_log_entry.info("received push message"); }, .PullResponse => |*pull| { var x_timer = std.time.Timer.start() catch unreachable; @@ -584,36 +602,10 @@ pub const GossipService = struct { }, .PingMessage => |*ping| { // TODO: filter out endpoints which are unspecificed / port = 0 - try ping_messages.append(PingMessage{ .ping = ping, .from_endpoint = &from_endpoint, }); - - // var x_timer = std.time.Timer.start() catch unreachable; - // defer { - // const elapsed = x_timer.read(); - // self.logger.debugf("handle batch ping took {} with {} items\n", .{ elapsed, 1 }); - // } - - // var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); - // defer endpoint_buf.deinit(); - - // var ping_log_entry = self.logger - // .field("from_endpoint", endpoint_buf.items) - // .field("from_pubkey", &ping.from.string()); - - // const packet = self.handlePingMessage(ping, from_endpoint) catch |err| { - // ping_log_entry - // .field("error", @errorName(err)) - // .err("error handling ping message"); - // continue; - // }; - // try self.packet_outgoing_channel.send(packet); - - // ping_log_entry - // .field("pongs sent", 1) - // .info("received ping message"); }, .PongMessage => |*pong| { try pong_messages.append(PongMessage{ @@ -625,6 +617,16 @@ pub const GossipService = struct { } // handle batch messages + // PUSH + if (push_messages.items.len > 0) { + var x_timer = std.time.Timer.start() catch unreachable; + const length = push_messages.items.len; + try self.handleBatchPushMessages(&push_messages, self.logger); + const elapsed = x_timer.read(); + self.logger.debugf("handle batch push took {} with {} items\n", .{ elapsed, length }); + push_messages.clearRetainingCapacity(); + } + // PULL REQ if (pull_requests.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; @@ -645,10 +647,11 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; // init a new batch of responses + // TODO: figure out a way to re-use this allocation instead of freeing after responder sends it var ping_packet_batch = try std.ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); - for (0..n_ping_messages) |_| { - ping_packet_batch.appendAssumeCapacity(Packet.default()); - } + ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); + + // TODO: add back logging for (ping_messages.items, 0..) |*ping_message, i| { const pong = try Pong.init(ping_message.ping, &self.my_keypair); @@ -1241,6 +1244,7 @@ pub const GossipService = struct { for (tasks.items) |task| { if (task.output.items.len > 0) { + // TODO: should only need one mux lock in this loop try self.packet_outgoing_channel.send(task.output); } } @@ -1534,6 +1538,123 @@ pub const GossipService = struct { return prune_packets; } + pub fn handleBatchPushMessages( + self: *Self, + batch_push_messages: *const std.ArrayList(PushMessage), + logger: Logger, + ) !void { + if (batch_push_messages.items.len == 0) { + return; + } + _ = logger; + + var pubkey_to_failed_origins = std.AutoArrayHashMap( + Pubkey, + AutoArrayHashSet(Pubkey), + ).init(self.allocator); + + var pubkey_to_endpoint = std.AutoArrayHashMap( + Pubkey, + EndPoint, + ).init(self.allocator); + + defer { + // TODO: figure out a way to re-use these allocs + pubkey_to_failed_origins.deinit(); + pubkey_to_endpoint.deinit(); + } + + // insert values and track the failed origins per pubkey + { + var crds_table_lock = self.crds_table_rw.write(); + defer crds_table_lock.unlock(); + + for (batch_push_messages.items) |*push_message| { + var crds_table: *CrdsTable = crds_table_lock.mut(); + var result = try crds_table.insertValues( + push_message.crds_values, + CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS, + false, + false, + ); + const failed_insert_indexs = result.failed.?; + defer failed_insert_indexs.deinit(); + + if (failed_insert_indexs.items.len == 0) { + // dont need to build prune messages + continue; + } + + // lookup contact info + const from_contact_info = crds_table.get(crds.CrdsValueLabel{ .LegacyContactInfo = push_message.from_pubkey.* }) orelse { + // unable to find contact info + continue; + }; + const from_gossip_addr = from_contact_info.value.data.LegacyContactInfo.gossip; + crds.sanitizeSocket(&from_gossip_addr) catch { + // invalid gossip socket + continue; + }; + + // track the endpoint + const from_gossip_endpoint = from_gossip_addr.toEndpoint(); + try pubkey_to_endpoint.put(push_message.from_pubkey.*, from_gossip_endpoint); + + // track failed origins + var failed_origins = blk: { + var lookup_result = try pubkey_to_failed_origins.getOrPut(push_message.from_pubkey.*); + if (!lookup_result.found_existing) { + lookup_result.value_ptr.* = AutoArrayHashSet(Pubkey).init(self.allocator); + } + break :blk lookup_result.value_ptr; + }; + for (failed_insert_indexs.items) |failed_index| { + const origin = push_message.crds_values[failed_index].id(); + try failed_origins.put(origin, {}); + } + } + } + + // build prune packets + // TODO: figure out a way to re-use this allocation + const now = getWallclockMs(); + var pubkey_to_failed_origins_iter = pubkey_to_failed_origins.iterator(); + + var n_packets = pubkey_to_failed_origins_iter.len; + if (n_packets == 0) return; + + var prune_packet_batch = try std.ArrayList(Packet).initCapacity(self.allocator, n_packets); + prune_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_packets); + var count: usize = 0; + + while (pubkey_to_failed_origins_iter.next()) |failed_origin_entry| { + const from_pubkey = failed_origin_entry.key_ptr.*; + const failed_origins_hashset = failed_origin_entry.value_ptr; + defer failed_origins_hashset.deinit(); + const from_endpoint = pubkey_to_endpoint.get(from_pubkey).?; + + const failed_origins: []Pubkey = failed_origins_hashset.keys(); + const prune_size = @min(failed_origins.len, MAX_PRUNE_DATA_NODES); + + var prune_data = PruneData.init( + self.my_pubkey, + failed_origins[0..prune_size], + from_pubkey, + now, + ); + prune_data.sign(&self.my_keypair) catch return error.SignatureError; + var protocol = Protocol{ .PruneMessage = .{ self.my_pubkey, prune_data } }; + + var packet = &prune_packet_batch.items[count]; + var written_slice = bincode.writeToSlice(&packet.data, protocol, bincode.Params{}) catch unreachable; + packet.size = written_slice.len; + packet.addr = from_endpoint; + count += 1; + } + + try self.packet_outgoing_channel.send(prune_packet_batch); + } + /// logic for handling push messages. crds values from the push message /// are inserted into the crds table. the origin pubkeys of values which /// fail the insertion are returned to generate prune messages. From 1444388207f0ca3b9a330e7f80f309a219038df3 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 27 Sep 2023 18:02:08 -0400 Subject: [PATCH 17/72] batch prune + pull resp --- src/benchmarks.zig | 10 +- src/gossip/gossip_service.zig | 226 +++++++++++++++++++++++----------- src/sync/channel.zig | 82 +++++++++--- 3 files changed, 227 insertions(+), 91 deletions(-) diff --git a/src/benchmarks.zig b/src/benchmarks.zig index f1edafb59..a6dd6781a 100644 --- a/src/benchmarks.zig +++ b/src/benchmarks.zig @@ -46,11 +46,11 @@ pub fn main() !void { TimeUnits.milliseconds, ); - try benchmark( - @import("gossip/gossip_service.zig").BenchmarkGossipServicePullRequest, - max_time_per_bench, - TimeUnits.milliseconds, - ); + // try benchmark( + // @import("gossip/gossip_service.zig").BenchmarkGossipServicePullRequest, + // max_time_per_bench, + // TimeUnits.milliseconds, + // ); } if (std.mem.startsWith(u8, "sync", filter)) { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index cab272c75..43c751add 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -441,6 +441,8 @@ pub const GossipService = struct { from_endpoint: *EndPoint, }; + pub const PullResponseMessage = struct { crds_values: []CrdsValue, from_pubkey: *Pubkey }; + /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; @@ -465,6 +467,12 @@ pub const GossipService = struct { var push_messages = try std.ArrayList(PushMessage).initCapacity(self.allocator, init_message_size); defer push_messages.deinit(); + var pull_response_messages = try std.ArrayList(PullResponseMessage).initCapacity(self.allocator, init_message_size); + defer pull_response_messages.deinit(); + + var prune_messages = try std.ArrayList(*PruneData).initCapacity(self.allocator, init_message_size); + defer prune_messages.deinit(); + while (!self.exit.load(std.atomic.Ordering.Unordered)) { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { @@ -498,39 +506,6 @@ pub const GossipService = struct { .from_pubkey = &push[0], .from_endpoint = &from_endpoint, }); - - // const push_from: Pubkey = push[0]; - // const push_values: []CrdsValue = push[1]; - - // var push_log_entry = self.logger - // .field("num_crds_values", push_values.len) - // .field("from_address", &push_from.string()); - - // var failed_insert_origins = self.handlePushMessage( - // push_values, - // ) catch |err| { - // push_log_entry.field("error", @errorName(err)) - // .err("error handling push message"); - // continue; - // }; - // defer failed_insert_origins.deinit(); - // _ = push_log_entry.field("num_failed_insert_origins", failed_insert_origins.count()); - - // if (failed_insert_origins.count() != 0) { - // var prune_packets = self.buildPruneMessage(&failed_insert_origins, push_from) catch |err| { - // push_log_entry.field("error", @errorName(err)) - // .err("error building prune messages"); - // continue; - // }; - // // // TODO: fix this too - // // defer prune_packets.deinit(); - - // _ = push_log_entry.field("num_prune_msgs", prune_packets.items.len); - // // TODO: pre-allocate this packet batch - // try self.packet_outgoing_channel.send(prune_packets); - // } - - // push_log_entry.info("received push message"); }, .PullResponse => |*pull| { var x_timer = std.time.Timer.start() catch unreachable; @@ -539,25 +514,13 @@ pub const GossipService = struct { self.logger.debugf("handle batch pull_resp took {} with {} items\n", .{ elapsed, 1 }); } - const from: Pubkey = pull[0]; - const crds_values: []CrdsValue = pull[1]; - - var pull_log_entry = self.logger - .field("num_crds_values", crds_values.len) - .field("from_address", &from.string()); - - self.handlePullResponse( - crds_values, - pull_log_entry, - ) catch |err| { - pull_log_entry.field("error", @errorName(err)) - .err("error handling pull response"); - continue; - }; - - pull_log_entry.info("received pull response"); + try pull_response_messages.append(PullResponseMessage{ + .from_pubkey = &pull[0], + .crds_values = pull[1], + }); }, .PullRequest => |*pull| { + // TODO: parallelize this var pull_value: CrdsValue = pull[1]; // contact info switch (pull_value.data) { .LegacyContactInfo => |*info| { @@ -577,28 +540,20 @@ pub const GossipService = struct { }); }, .PruneMessage => |*prune| { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - self.logger.debugf("handle batch prune took {} with {} items\n", .{ elapsed, 1 }); + var prune_data = &prune[1]; + const now = getWallclockMs(); + const prune_wallclock = prune_data.wallclock; + const too_old = prune_wallclock < now -| CRDS_GOSSIP_PRUNE_MSG_TIMEOUT_MS; + if (too_old) { + // return error.PruneMessageTooOld; + continue; } - const prune_msg: PruneData = prune[1]; - - var endpoint_buf = try endpointToString(self.allocator, &from_endpoint); - defer endpoint_buf.deinit(); - - var prune_log_entry = self.logger - .field("from_endpoint", endpoint_buf.items) - .field("from_pubkey", &prune_msg.pubkey.string()) - .field("num_prunes", prune_msg.prunes.len); - - self.handlePruneMessage(&prune_msg) catch |err| { - prune_log_entry.field("error", @errorName(err)) - .err("error handling prune message"); + const bad_destination = !prune_data.destination.equals(&self.my_pubkey); + if (bad_destination) { + // return error.BadDestination; continue; - }; - - prune_log_entry.info("received prune message"); + } + try prune_messages.append(&prune[1]); }, .PingMessage => |*ping| { // TODO: filter out endpoints which are unspecificed / port = 0 @@ -627,6 +582,16 @@ pub const GossipService = struct { push_messages.clearRetainingCapacity(); } + // PRUNE + if (prune_messages.items.len > 0) { + var x_timer = std.time.Timer.start() catch unreachable; + const length = prune_messages.items.len; + try self.handleBatchPruneMessages(&prune_messages); + const elapsed = x_timer.read(); + self.logger.debugf("handle batch prune took {} with {} items\n", .{ elapsed, length }); + prune_messages.clearRetainingCapacity(); + } + // PULL REQ if (pull_requests.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; @@ -641,6 +606,16 @@ pub const GossipService = struct { pull_requests.clearRetainingCapacity(); } + // PULL RESP + if (pull_response_messages.items.len > 0) { + var x_timer = std.time.Timer.start() catch unreachable; + const length = pull_response_messages.items.len; + try self.handleBatchPullResponses(&pull_response_messages, self.logger); + const elapsed = x_timer.read(); + self.logger.debugf("handle batch pull_resp took {} with {} items\n", .{ elapsed, length }); + pull_response_messages.clearRetainingCapacity(); + } + // PING const n_ping_messages = ping_messages.items.len; if (n_ping_messages > 0) { @@ -713,7 +688,8 @@ pub const GossipService = struct { } const elapsed = timer.read(); - self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + // self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); self.messages_processed.store(msg_count, std.atomic.Ordering.Release); } @@ -1083,6 +1059,7 @@ pub const GossipService = struct { filter: CrdsFilter, crds_table: *const CrdsTable, output: std.ArrayList(Packet), + output_limit: *std.atomic.Atomic(i64), task: Task, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), @@ -1095,18 +1072,28 @@ pub const GossipService = struct { var this = @fieldParentPtr(@This(), "task", task); defer this.done.store(true, std.atomic.Ordering.Release); + const output_limit = this.output_limit.load(std.atomic.Ordering.Unordered); + if (output_limit <= 0) { + return; + } + const response_crds_values = pull_response.filterCrdsValues( this.allocator, this.crds_table, &this.filter, crds.getWallclockMs(), - MAX_NUM_CRDS_VALUES_PULL_RESPONSE, + @as(usize, @max(output_limit, 0)), ) catch { // std.debug.print("filterCrdsValues failed\n", .{}); return; }; defer response_crds_values.deinit(); + _ = this.output_limit.fetchSub( + @as(i64, @intCast(response_crds_values.items.len)), + std.atomic.Ordering.Release, + ); + const maybe_packets = crdsValuesToPackets( this.allocator, this.my_pubkey, @@ -1210,6 +1197,8 @@ pub const GossipService = struct { const crds_table: *const CrdsTable = crds_table_lock.get(); defer crds_table_lock.unlock(); + var output_limit = std.atomic.Atomic(i64).init(MAX_NUM_CRDS_VALUES_PULL_RESPONSE); + for (valid_indexs.items) |i| { // TODO: pre-allocate these tasks // create the thread task @@ -1222,6 +1211,7 @@ pub const GossipService = struct { .crds_table = crds_table, .output = output, .allocator = self.allocator, + .output_limit = &output_limit, }; // alloc on heap @@ -1369,6 +1359,78 @@ pub const GossipService = struct { return packets; } + pub fn handleBatchPullResponses( + self: *Self, + pull_response_messages: *const std.ArrayList(PullResponseMessage), + logger: Logger, + ) !void { + if (pull_response_messages.items.len == 0) { + return; + } + _ = logger; + + const now = getWallclockMs(); + var failed_insert_ptrs = std.ArrayList(*CrdsValue).init(self.allocator); + { + var crds_table_lock = self.crds_table_rw.write(); + var crds_table: *CrdsTable = crds_table_lock.mut(); + defer crds_table_lock.unlock(); + + for (pull_response_messages.items) |*pull_message| { + const crds_values = pull_message.crds_values; + + const insert_results = try crds_table.insertValues( + crds_values, + CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, + true, + true, + ); + + // silently insert the timeout values + // (without updating all associated origin values) + const timeout_indexs = insert_results.timeouts.?; + defer timeout_indexs.deinit(); + for (timeout_indexs.items) |index| { + crds_table.insert( + crds_values[index], + now, + ) catch {}; + } + + // update the contactInfo timestamps of the successful inserts + // (and all other origin values) + const successful_insert_indexs = insert_results.inserted.?; + defer successful_insert_indexs.deinit(); + for (successful_insert_indexs.items) |index| { + const origin = crds_values[index].id(); + crds_table.updateRecordTimestamp(origin, now); + } + crds_table.updateRecordTimestamp(pull_message.from_pubkey.*, now); + + var failed_insert_indexs = insert_results.failed.?; + defer failed_insert_indexs.deinit(); + for (failed_insert_indexs.items) |index| { + try failed_insert_ptrs.append(&crds_values[index]); + } + } + } + + { + var failed_pull_hashes_lock = self.failed_pull_hashes_mux.lock(); + var failed_pull_hashes: *HashTimeQueue = failed_pull_hashes_lock.mut(); + defer failed_pull_hashes_lock.unlock(); + + var buf: [PACKET_DATA_SIZE]u8 = undefined; + for (failed_insert_ptrs.items) |crds_value_ptr| { + var bytes = bincode.writeToSlice(&buf, crds_value_ptr.*, bincode.Params.standard) catch { + continue; + }; + const value_hash = Hash.generateSha256Hash(bytes); + try failed_pull_hashes.insert(value_hash, now); + } + } + } + /// logic for handling a pull response message. /// successful inserted values, have their origin value timestamps updated. /// failed inserts (ie, too old or duplicate values) are added to the failed pull hashes so that they can be @@ -1446,6 +1508,26 @@ pub const GossipService = struct { } } + pub fn handleBatchPruneMessages( + self: *Self, + prune_messages: *const std.ArrayList(*PruneData), + ) !void { + var active_set_lock = self.active_set_rw.write(); + defer active_set_lock.unlock(); + var active_set: *ActiveSet = active_set_lock.mut(); + + for (prune_messages.items) |prune_data| { + // update active set + const from_pubkey = prune_data.pubkey; + for (prune_data.prunes) |origin| { + if (origin.equals(&self.my_pubkey)) { + continue; + } + active_set.prune(from_pubkey, origin); + } + } + } + /// logic for handling a prune message. verifies the prune message /// is not too old, and that the destination pubkey is the local node, /// then updates the active set to prune the list of origin Pubkeys. diff --git a/src/sync/channel.zig b/src/sync/channel.zig index 8605cd6bd..b2d8e29cb 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -121,9 +121,11 @@ pub fn Channel(comptime T: type) type { const Block = struct { num: u32 = 333, valid: bool = true, + data: [1024]u8 = undefined, }; const BlockChannel = Channel(Block); +const BlockPointerChannel = Channel(*Block); const logger = std.log.scoped(.sync_channel_tests); @@ -143,6 +145,56 @@ fn testSender(chan: *BlockChannel, total_send: usize) void { chan.close(); } +fn testPointerSender(chan: *BlockPointerChannel, total_send: usize) void { + var allocator = chan.allocator; + var i: usize = 0; + while (i < total_send) : (i += 1) { + var block_ptr = allocator.create(Block) catch unreachable; + block_ptr.* = Block{ .num = @intCast(i) }; + chan.send(block_ptr) catch unreachable; + } + chan.close(); +} + +fn testPointerReceiver(chan: *BlockPointerChannel, recv_count: *Atomic(usize), id: u8) void { + var allocator = chan.allocator; + _ = id; + while (chan.receive()) |v| { + _ = recv_count.fetchAdd(1, .SeqCst); + allocator.destroy(v); + } +} + +// pub fn Tunnel(T: type) type { +// return struct { +// incoming_channel: Channel(T), +// outgoing_channel: Channel(T), + +// pub fn init(allocator: std.mem.Allocator) Tunnel(T) { +// return Tunnel(T){ +// .incoming_channel = Channel(T).init(allocator, 100), +// .outgoing_channel = Channel(T).init(allocator, 100), +// }; +// } + +// pub fn run(self: *Tunnel(T)) void { +// while (true) { +// const maybe_packets = try self.incoming_channel.try_drain(); +// if (maybe_packets == null) { +// continue; +// } +// const packets = maybe_packets.?; +// defer self.packet_incoming_channel.allocator.free(packets); + +// for (packets) |*p| { +// _ = p; +// std.time.sleep(100); +// } +// } +// } +// }; +// } + test "sync.channel: channel works properly" { var ch = BlockChannel.init(testing.allocator, 100); defer ch.deinit(); @@ -150,15 +202,11 @@ test "sync.channel: channel works properly" { var recv_count: Atomic(usize) = Atomic(usize).init(0); var send_count: usize = 100_000; - var join1 = try std.Thread.spawn(.{}, testReceiver, .{ ch, &recv_count, 1 }); var join2 = try std.Thread.spawn(.{}, testSender, .{ ch, send_count }); - var join3 = try std.Thread.spawn(.{}, testReceiver, .{ ch, &recv_count, 2 }); - var join4 = try std.Thread.spawn(.{}, testReceiver, .{ ch, &recv_count, 3 }); + var join1 = try std.Thread.spawn(.{}, testReceiver, .{ ch, &recv_count, 1 }); join1.join(); join2.join(); - join3.join(); - join4.join(); try testing.expectEqual(send_count, recv_count.value); } @@ -166,25 +214,31 @@ test "sync.channel: channel works properly" { pub const BenchmarkChannel = struct { pub const min_iterations = 10; pub const max_iterations = 20; + const send_count: usize = 100_000; pub fn benchmarkChannel() !void { - const T: type = Block; - const allocator = std.heap.page_allocator; - var channel = Channel(T).init(allocator, 100); + var channel = BlockChannel.init(allocator, send_count / 2); defer channel.deinit(); var recv_count: Atomic(usize) = Atomic(usize).init(0); - var send_count: usize = 100_000; - var join1 = try std.Thread.spawn(.{}, testReceiver, .{ channel, &recv_count, 1 }); var join2 = try std.Thread.spawn(.{}, testSender, .{ channel, send_count }); - var join3 = try std.Thread.spawn(.{}, testReceiver, .{ channel, &recv_count, 2 }); - var join4 = try std.Thread.spawn(.{}, testReceiver, .{ channel, &recv_count, 3 }); + var join1 = try std.Thread.spawn(.{}, testReceiver, .{ channel, &recv_count, 1 }); + join1.join(); + join2.join(); + } + + pub fn benchmarkPointerChannel() !void { + const allocator = std.heap.page_allocator; + var channel = BlockPointerChannel.init(allocator, send_count / 2); + defer channel.deinit(); + + var recv_count: Atomic(usize) = Atomic(usize).init(0); + var join2 = try std.Thread.spawn(.{}, testPointerSender, .{ channel, send_count }); + var join1 = try std.Thread.spawn(.{}, testPointerReceiver, .{ channel, &recv_count, 1 }); join1.join(); join2.join(); - join3.join(); - join4.join(); } }; From b3a292641999651bbd15aeb4b494965a9266cf74 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 29 Sep 2023 13:00:52 -0400 Subject: [PATCH 18/72] up --- src/gossip/crds_table.zig | 2 +- src/gossip/gossip_service.zig | 88 ++++++--- src/sync/channel.zig | 331 ++++++++++++++++++++++++++++++---- 3 files changed, 356 insertions(+), 65 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 298aaee0d..6e0fab6b0 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -537,7 +537,7 @@ pub const CrdsTable = struct { const drop_size = n_pubkeys -| max_pubkey_capacity; // TODO: drop based on stake weight const drop_pubkeys = self.pubkey_to_values.keys()[0..drop_size]; - const labels = self.store.iterator().keys; + const labels = self.store.keys(); // allocate here so SwapRemove doesnt mess with us var labels_to_remove = std.ArrayList(CrdsValueLabel).init(self.allocator); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 43c751add..1adbf91c1 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -55,9 +55,14 @@ const Hash = @import("../core/hash.zig").Hash; const socket_utils = @import("socket_utils.zig"); const Channel = @import("../sync/channel.zig").Channel; +const RingBuffer = @import("../sync/channel.zig").RingBuffer; +const RingBufferV2 = @import("../sync/channel.zig").RingBufferV2; + const PacketChannel = Channel(Packet); const PacketBatchChannel = Channel(std.ArrayList(Packet)); +const ProtocolRingBuffer = RingBufferV2(ProtocolMessage); + const ProtocolMessage = struct { from_endpoint: EndPoint, message: Protocol }; const ProtocolChannel = Channel(ProtocolMessage); const PingCache = @import("./ping_pong.zig").PingCache; @@ -135,6 +140,7 @@ pub const GossipService = struct { var packet_incoming_channel = PacketBatchChannel.init(allocator, 10000); var packet_outgoing_channel = PacketBatchChannel.init(allocator, 10000); var verified_incoming_channel = ProtocolChannel.init(allocator, 10000); + // var verified_incoming_channel = ProtocolRingBuffer.init(allocator, 100_000); errdefer { packet_incoming_channel.deinit(); @@ -324,6 +330,7 @@ pub const GossipService = struct { packet: *const Packet, allocator: std.mem.Allocator, verified_incoming_channel: *Channel(ProtocolMessage), + // verified_incoming_channel: *ProtocolRingBuffer, task: Task, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), @@ -357,6 +364,12 @@ pub const GossipService = struct { .message = protocol_message, }; this.verified_incoming_channel.send(msg) catch unreachable; + + // TODO: fix + // _ = this.verified_incoming_channel.push(msg); + // while (!) { + // std.time.sleep(100); + // } } }; @@ -379,7 +392,11 @@ pub const GossipService = struct { } while (!self.exit.load(std.atomic.Ordering.Unordered)) { + // var drain_timer = std.time.Timer.start() catch unreachable; const maybe_packets = try self.packet_incoming_channel.try_drain(); + // const drain_elapsed = drain_timer.read(); + // self.logger.debugf("handle batch packet_drain took {} with {} items\n", .{ drain_elapsed, 1 }); + if (maybe_packets == null) { // // sleep for 1ms // std.time.sleep(std.time.ns_per_ms * 1); @@ -474,23 +491,33 @@ pub const GossipService = struct { defer prune_messages.deinit(); while (!self.exit.load(std.atomic.Ordering.Unordered)) { + // var drain_timer = std.time.Timer.start() catch unreachable; const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); + // const drain_elapsed = drain_timer.read(); + // self.logger.debugf("handle batch msg_drain took {} with {} items\n", .{ drain_elapsed, 1 }); + if (maybe_protocol_messages == null) { // // sleep for 1ms // std.time.sleep(std.time.ns_per_ms * 1); continue; } + if (msg_count == 0) { timer.reset(); } const protocol_messages = maybe_protocol_messages.?; defer self.verified_incoming_channel.allocator.free(protocol_messages); + // defer { + // // self.verified_incoming_channel.consumeAmount(protocol_messages.items.len); + // protocol_messages.deinit(); + // } msg_count += protocol_messages.len; // TODO: filter messages based on_shred_version for (protocol_messages) |*protocol_message| { + var from_endpoint: EndPoint = protocol_message.from_endpoint; switch (protocol_message.message) { @@ -498,7 +525,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - self.logger.debugf("handle batch push took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch push took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); } try push_messages.append(PushMessage{ @@ -511,7 +538,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - self.logger.debugf("handle batch pull_resp took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch pull_resp took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); } try pull_response_messages.append(PullResponseMessage{ @@ -520,18 +547,6 @@ pub const GossipService = struct { }); }, .PullRequest => |*pull| { - // TODO: parallelize this - var pull_value: CrdsValue = pull[1]; // contact info - switch (pull_value.data) { - .LegacyContactInfo => |*info| { - if (info.id.equals(&self.my_pubkey)) { - // talking to myself == ignore - continue; - } - }, - // only contact info supported - else => continue, - } try pull_requests.append(.{ .filter = pull[0], @@ -578,7 +593,7 @@ pub const GossipService = struct { const length = push_messages.items.len; try self.handleBatchPushMessages(&push_messages, self.logger); const elapsed = x_timer.read(); - self.logger.debugf("handle batch push took {} with {} items\n", .{ elapsed, length }); + self.logger.debugf("handle batch push took {} with {} items @{}\n", .{ elapsed, length, msg_count }); push_messages.clearRetainingCapacity(); } @@ -588,7 +603,7 @@ pub const GossipService = struct { const length = prune_messages.items.len; try self.handleBatchPruneMessages(&prune_messages); const elapsed = x_timer.read(); - self.logger.debugf("handle batch prune took {} with {} items\n", .{ elapsed, length }); + self.logger.debugf("handle batch prune took {} with {} items @{}\n", .{ elapsed, length, msg_count }); prune_messages.clearRetainingCapacity(); } @@ -598,7 +613,7 @@ pub const GossipService = struct { const length = pull_requests.items.len; self.handleBatchPullRequest(pull_requests); const elapsed = x_timer.read(); - self.logger.debugf("handle batch pull_req took {} with {} items\n", .{ elapsed, length }); + self.logger.debugf("handle batch pull_req took {} with {} items @{}\n", .{ elapsed, length, msg_count }); for (pull_requests.items) |*pr| { pr.filter.deinit(); @@ -612,7 +627,7 @@ pub const GossipService = struct { const length = pull_response_messages.items.len; try self.handleBatchPullResponses(&pull_response_messages, self.logger); const elapsed = x_timer.read(); - self.logger.debugf("handle batch pull_resp took {} with {} items\n", .{ elapsed, length }); + self.logger.debugf("handle batch pull_resp took {} with {} items @{}\n", .{ elapsed, length, msg_count }); pull_response_messages.clearRetainingCapacity(); } @@ -644,7 +659,7 @@ pub const GossipService = struct { } try self.packet_outgoing_channel.send(ping_packet_batch); - self.logger.debugf("handle batch ping took {} with {} items\n", .{ x_timer.read(), n_ping_messages }); + self.logger.debugf("handle batch ping took {} with {} items @{}\n", .{ x_timer.read(), n_ping_messages, msg_count }); ping_messages.clearRetainingCapacity(); } @@ -666,7 +681,7 @@ pub const GossipService = struct { ); } - self.logger.debugf("handle batch pong took {} with {} items\n", .{ x_timer.read(), length }); + self.logger.debugf("handle batch pong took {} with {} items @{}\n", .{ x_timer.read(), length, msg_count }); pong_messages.clearRetainingCapacity(); } @@ -675,7 +690,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - self.logger.debugf("handle batch crds_trim took {} with {} items\n", .{ elapsed, 1 }); + self.logger.debugf("handle batch crds_trim took {} with {} items @{}\n", .{ elapsed, 1, msg_count}); } var crds_table_lock = self.crds_table_rw.write(); @@ -688,9 +703,15 @@ pub const GossipService = struct { } const elapsed = timer.read(); - // self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); - std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); + // std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); self.messages_processed.store(msg_count, std.atomic.Ordering.Release); + if (msg_count >= 30_000) { + // if (msg_count >= 1_000) { + std.debug.print("exiting...\n", .{}); + self.exit.store(true, std.atomic.Ordering.Unordered); + break; + } } self.logger.debugf("process_messages loop closed\n", .{}); @@ -1056,7 +1077,8 @@ pub const GossipService = struct { allocator: std.mem.Allocator, my_pubkey: *const Pubkey, from_endpoint: *const EndPoint, - filter: CrdsFilter, + filter: *CrdsFilter, + value: *CrdsValue, crds_table: *const CrdsTable, output: std.ArrayList(Packet), output_limit: *std.atomic.Atomic(i64), @@ -1072,6 +1094,17 @@ pub const GossipService = struct { var this = @fieldParentPtr(@This(), "task", task); defer this.done.store(true, std.atomic.Ordering.Release); + switch (this.value.data) { + .LegacyContactInfo => |*info| { + if (info.id.equals(this.my_pubkey)) { + // talking to myself == ignore + return; + } + }, + // only contact info supported + else => return, + } + const output_limit = this.output_limit.load(std.atomic.Ordering.Unordered); if (output_limit <= 0) { return; @@ -1080,7 +1113,7 @@ pub const GossipService = struct { const response_crds_values = pull_response.filterCrdsValues( this.allocator, this.crds_table, - &this.filter, + this.filter, crds.getWallclockMs(), @as(usize, @max(output_limit, 0)), ) catch { @@ -1115,6 +1148,8 @@ pub const GossipService = struct { self: *Self, pull_requests: std.ArrayList(PullRequestMessage), ) !void { + // TODO: parallelize this + // update the callers const now = getWallclockMs(); { @@ -1207,7 +1242,8 @@ pub const GossipService = struct { .task = .{ .callback = PullRequestTask.callback }, .my_pubkey = &self.my_pubkey, .from_endpoint = &pull_requests.items[i].from_endpoint, - .filter = pull_requests.items[i].filter, + .filter = &pull_requests.items[i].filter, + .value = &pull_requests.items[i].value, .crds_table = crds_table, .output = output, .allocator = self.allocator, diff --git a/src/sync/channel.zig b/src/sync/channel.zig index b2d8e29cb..7f17c552a 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -5,6 +5,174 @@ const Condition = std.Thread.Condition; const testing = std.testing; const assert = std.debug.assert; const Mux = @import("mux.zig").Mux; +const Ordering = std.atomic.Ordering; + +pub fn RingBufferV2(comptime T: type) type { + return struct { + buffer: []T, + index: usize, + count: Atomic(usize), + allocator: std.mem.Allocator, // just used for deinit + + const Self = @This(); + + pub fn init(allocator: std.mem.Allocator, capacity: usize) *Self { + std.debug.assert(capacity > 0); + var self = allocator.create(Self) catch unreachable; + const buffer = allocator.alloc(T, capacity) catch unreachable; + self.* = RingBufferV2(T){ + .buffer = buffer, + .index = 0, + .count = Atomic(usize).init(0), + .allocator = allocator, + }; + return self; + } + + pub fn deinit(self: *Self) void { + self.allocator.free(self.buffer); + } + + pub inline fn isFull(self: *const Self) bool { + return self.count.load(Ordering.Acquire) == self.buffer.len; + } + + pub inline fn isEmpty(self: *const Self) bool { + return self.count.load(Ordering.Acquire) == 0; + } + + // get next free pointer => fill it up => increment count + pub inline fn getNextFreePtr(self: *Self) ?*T { + if (self.isFull()) return null; + const count = self.count.load(Ordering.Acquire); + return &self.buffer[(self.index + count) % self.buffer.len]; + } + + pub inline fn incrementCount(self: *Self) void { + const count = self.count.fetchAdd(1, Ordering.Release); + _ = count; + } + + // get head pointer => process it => increment head + pub inline fn getHeadPtr(self: *Self) ?*T { + if (self.isEmpty()) return null; + return &self.buffer[self.index % self.buffer.len]; + } + + // higher level functions + pub inline fn push(self: *Self, value: T) bool { + if (self.getNextFreePtr()) |ptr| { + ptr.* = value; + _ = self.count.fetchAdd(1, Ordering.Release); + return true; + } else { + return false; + } + } + + pub inline fn try_drain(self: *Self) !?std.ArrayList(T) { + if (self.isEmpty()) return null; + + const count = self.count.load(Ordering.Acquire); + var items = try std.ArrayList(T).initCapacity(self.allocator, count); + for (0..count) |i| { + var ptr = &self.buffer[(self.index + i) % self.buffer.len]; + items.appendAssumeCapacity(ptr.*); + } + + _ = self.count.fetchSub(count, Ordering.Release); + self.index += count; + + return items; + } + }; +} + +pub fn RingBuffer(comptime T: type) type { + return struct { + buffer: []T, + index: usize, + count: Atomic(usize), + allocator: std.mem.Allocator, // just used for deinit + + const Self = @This(); + + pub fn init(allocator: std.mem.Allocator, capacity: usize) *Self { + std.debug.assert(capacity > 0); + var self = allocator.create(Self) catch unreachable; + const buffer = allocator.alloc(T, capacity) catch unreachable; + self.* = RingBuffer(T){ + .buffer = buffer, + .index = 0, + .count = Atomic(usize).init(0), + .allocator = allocator, + }; + return self; + } + + pub fn deinit(self: *Self) void { + self.allocator.free(self.buffer); + } + + pub fn isFull(self: *const Self) bool { + return self.count.load(Ordering.Acquire) == self.buffer.len; + } + + pub fn isEmpty(self: *const Self) bool { + return self.count.load(Ordering.Acquire) == 0; + } + + // get next free pointer => fill it up => increment count + pub inline fn getNextFreePtr(self: *Self) ?*T { + if (self.isFull()) return null; + const count = self.count.load(Ordering.Acquire); + return &self.buffer[(self.index + count) % self.buffer.len]; + } + + pub inline fn incrementCount(self: *Self) void { + const count = self.count.fetchAdd(1, Ordering.Release); + _ = count; + // std.debug.print("count incremented from {} -> {}\n", .{count, count+1}); + } + + // get head pointer => process it => increment head + pub inline fn getHeadPtr(self: *Self) ?*T { + if (self.isEmpty()) return null; + return &self.buffer[self.index % self.buffer.len]; + } + + pub inline fn consumeAmount(self: *Self, amount: usize) void { + const count = self.count.fetchSub(amount, Ordering.Release); + // std.debug.print("consuming {}: count: {}->{} new index: {}->{}\n", .{amount, count, count - amount, self.index, self.index+amount}); + _ = count; + self.index += amount; + } + + // higher level functions + pub fn push(self: *Self, value: T) bool { + if (self.getNextFreePtr()) |ptr| { + ptr.* = value; + self.incrementCount(); + return true; + } else { + return false; + } + } + + pub fn try_drain(self: *const Self) !?std.ArrayList(*T) { + if (self.isEmpty()) return null; + + const count = self.count.load(Ordering.Acquire); + // std.debug.print("reading: {} -> {} (count = {})\n", .{self.index, self.index + count, count}); + var items = try std.ArrayList(*T).initCapacity(self.allocator, count); + for (0..count) |i| { + var ptr = &self.buffer[(self.index + i) % self.buffer.len]; + items.appendAssumeCapacity(ptr); + } + return items; + } + }; +} /// A very basic mpmc channel implementation - TODO: replace with a legit channel impl pub fn Channel(comptime T: type) type { @@ -145,6 +313,88 @@ fn testSender(chan: *BlockChannel, total_send: usize) void { chan.close(); } +const Packet = @import("../gossip/packet.zig").Packet; +fn testPacketSender(chan: *Channel(Packet), total_send: usize) void { + var i: usize = 0; + while (i < total_send) : (i += 1) { + var packet = Packet.default(); + chan.send(packet) catch unreachable; + } +} + +fn testPacketReceiver(chan: *Channel(Packet), total_recv: usize) void { + var count: usize = 0; + while (count < total_recv) : (count += 1) { + const v = chan.receive(); + _ = v; + } +} + +fn testPacketSenderBuffer(ring_buffer: *RingBuffer(Packet), total_send: usize) void { + var i: usize = 0; + while (i < total_send) { + var packet = Packet.default(); + packet.data[2] = @as(u8, @truncate(i)); + if (ring_buffer.push(packet)) { + i += 1; + } + } +} + +fn testPacketRecvBuffer(ring_buffer: *RingBuffer(Packet), total_recv: usize) void { + var count: usize = 0; + while (count < total_recv) { + if (ring_buffer.getHeadPtr()) |head| { + defer ring_buffer.consumeAmount(1); + _ = head; + count += 1; + // std.debug.print("recv count: {}/{} \n", .{count, total_recv}); + } + } +} + +fn testPacketRecvBufferDrain(ring_buffer: *RingBuffer(Packet), total_recv: usize) void { + var count: usize = 0; + while (count < total_recv) { + if (ring_buffer.try_drain() catch unreachable) |ptrs| { + for (ptrs.items) |ptr| { + // std.debug.print("{any}", .{ptr.*.data[2]}); + _ = ptr; + count += 1; + } + defer { + ptrs.deinit(); + ring_buffer.consumeAmount(ptrs.items.len); + } + } + } +} + +fn testPacketSenderBufferV2(ring_buffer: *RingBufferV2(Packet), total_send: usize) void { + var i: usize = 0; + while (i < total_send) { + var packet = Packet.default(); + packet.data[2] = @as(u8, @truncate(i)); + if (ring_buffer.push(packet)) { + i += 1; + } + } +} + +fn testPacketRecvBufferDrainV2(ring_buffer: *RingBufferV2(Packet), total_recv: usize) void { + var count: usize = 0; + while (count < total_recv) { + if (ring_buffer.try_drain() catch unreachable) |v| { + for (v.items) |val| { + // std.debug.print("{any}", .{val.data[2]}); + _ = val; + count += 1; + } + v.deinit(); + } + } +} + fn testPointerSender(chan: *BlockPointerChannel, total_send: usize) void { var allocator = chan.allocator; var i: usize = 0; @@ -165,36 +415,6 @@ fn testPointerReceiver(chan: *BlockPointerChannel, recv_count: *Atomic(usize), i } } -// pub fn Tunnel(T: type) type { -// return struct { -// incoming_channel: Channel(T), -// outgoing_channel: Channel(T), - -// pub fn init(allocator: std.mem.Allocator) Tunnel(T) { -// return Tunnel(T){ -// .incoming_channel = Channel(T).init(allocator, 100), -// .outgoing_channel = Channel(T).init(allocator, 100), -// }; -// } - -// pub fn run(self: *Tunnel(T)) void { -// while (true) { -// const maybe_packets = try self.incoming_channel.try_drain(); -// if (maybe_packets == null) { -// continue; -// } -// const packets = maybe_packets.?; -// defer self.packet_incoming_channel.allocator.free(packets); - -// for (packets) |*p| { -// _ = p; -// std.time.sleep(100); -// } -// } -// } -// }; -// } - test "sync.channel: channel works properly" { var ch = BlockChannel.init(testing.allocator, 100); defer ch.deinit(); @@ -212,9 +432,9 @@ test "sync.channel: channel works properly" { } pub const BenchmarkChannel = struct { - pub const min_iterations = 10; - pub const max_iterations = 20; - const send_count: usize = 100_000; + pub const min_iterations = 5; + pub const max_iterations = 5; + const send_count: usize = 500_000; pub fn benchmarkChannel() !void { const allocator = std.heap.page_allocator; @@ -229,14 +449,49 @@ pub const BenchmarkChannel = struct { join2.join(); } - pub fn benchmarkPointerChannel() !void { + pub fn benchmarkPacketChannel() !void { const allocator = std.heap.page_allocator; - var channel = BlockPointerChannel.init(allocator, send_count / 2); + var channel = Channel(Packet).init(allocator, send_count / 2); defer channel.deinit(); - var recv_count: Atomic(usize) = Atomic(usize).init(0); - var join2 = try std.Thread.spawn(.{}, testPointerSender, .{ channel, send_count }); - var join1 = try std.Thread.spawn(.{}, testPointerReceiver, .{ channel, &recv_count, 1 }); + var join1 = try std.Thread.spawn(.{}, testPacketReceiver, .{ channel, send_count }); + var join2 = try std.Thread.spawn(.{}, testPacketSender, .{ channel, send_count }); + + join1.join(); + join2.join(); + } + + pub fn benchmarkPacketChannelBuffer() !void { + const allocator = std.heap.page_allocator; + var buffer = RingBuffer(Packet).init(allocator, send_count / 2); + defer buffer.deinit(); + + var join1 = try std.Thread.spawn(.{}, testPacketRecvBuffer, .{ buffer, send_count }); + var join2 = try std.Thread.spawn(.{}, testPacketSenderBuffer, .{ buffer, send_count }); + + join1.join(); + join2.join(); + } + + pub fn benchmarkPacketChannelBufferDrain() !void { + const allocator = std.heap.page_allocator; + var buffer = RingBuffer(Packet).init(allocator, send_count / 2); + defer buffer.deinit(); + + var join1 = try std.Thread.spawn(.{}, testPacketRecvBufferDrain, .{ buffer, send_count }); + var join2 = try std.Thread.spawn(.{}, testPacketSenderBuffer, .{ buffer, send_count }); + + join1.join(); + join2.join(); + } + + pub fn benchmarkPacketChannelBufferDrainV2() !void { + const allocator = std.heap.page_allocator; + var buffer = RingBufferV2(Packet).init(allocator, send_count / 2); + defer buffer.deinit(); + + var join1 = try std.Thread.spawn(.{}, testPacketRecvBufferDrainV2, .{ buffer, send_count }); + var join2 = try std.Thread.spawn(.{}, testPacketSenderBufferV2, .{ buffer, send_count }); join1.join(); join2.join(); From d78ae98dceff774169e664c2b5994761b3f7fce1 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 29 Sep 2023 13:03:41 -0400 Subject: [PATCH 19/72] fix --- src/gossip/packet.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gossip/packet.zig b/src/gossip/packet.zig index 1d7710c2a..6a70a56e9 100644 --- a/src/gossip/packet.zig +++ b/src/gossip/packet.zig @@ -23,7 +23,10 @@ pub const Packet = struct { pub fn default() Self { return .{ - .addr = network.EndPoint.default(), + .addr = .{ + .port = 0, + .address = .{ .ipv4 = network.Address.IPv4.any } + }, .data = undefined, .size = 0, }; From 1bcaaa1f320f27657949a4cdfe5163bdc8dc7d0d Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 3 Oct 2023 15:20:24 -0400 Subject: [PATCH 20/72] clean up --- .gitignore | 3 +- src/gossip/crds_table.zig | 16 +- src/gossip/gossip_service.zig | 132 +++++++--------- src/gossip/socket_utils.zig | 188 +--------------------- src/sync/channel.zig | 288 ---------------------------------- 5 files changed, 70 insertions(+), 557 deletions(-) diff --git a/.gitignore b/.gitignore index 4a0641ed7..83256d118 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ zig-cache/ -zig-out/ \ No newline at end of file +zig-out/ +data/ \ No newline at end of file diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 6e0fab6b0..1464764ba 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -587,18 +587,18 @@ pub const CrdsTable = struct { } pub fn callback(task: *Task) void { - var this = @fieldParentPtr(@This(), "task", task); - defer this.done.store(true, std.atomic.Ordering.Release); + var self = @fieldParentPtr(@This(), "task", task); + defer self.done.store(true, std.atomic.Ordering.Release); // get assocaited entries - const entry = this.crds_table.pubkey_to_values.getEntry(this.key).?; + const entry = self.crds_table.pubkey_to_values.getEntry(self.key).?; // if contact info is up to date then we dont need to check the values const pubkey = entry.key_ptr; const label = CrdsValueLabel{ .LegacyContactInfo = pubkey.* }; - if (this.crds_table.get(label)) |*contact_info| { + if (self.crds_table.get(label)) |*contact_info| { const value_timestamp = @min(contact_info.value.wallclock(), contact_info.timestamp_on_insertion); - if (value_timestamp > this.cutoff_timestamp) { + if (value_timestamp > self.cutoff_timestamp) { return; } } @@ -608,10 +608,10 @@ pub const CrdsTable = struct { const count = entry_indexs.count(); for (entry_indexs.iterator().keys[0..count]) |entry_index| { - const versioned_value = this.crds_table.store.values()[entry_index]; + const versioned_value = self.crds_table.store.values()[entry_index]; const value_timestamp = @min(versioned_value.value.wallclock(), versioned_value.timestamp_on_insertion); - if (value_timestamp <= this.cutoff_timestamp) { - this.old_labels.append(versioned_value.value.label()) catch unreachable; + if (value_timestamp <= self.cutoff_timestamp) { + self.old_labels.append(versioned_value.value.label()) catch unreachable; } } } diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 1adbf91c1..fc326efaf 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -55,14 +55,10 @@ const Hash = @import("../core/hash.zig").Hash; const socket_utils = @import("socket_utils.zig"); const Channel = @import("../sync/channel.zig").Channel; -const RingBuffer = @import("../sync/channel.zig").RingBuffer; -const RingBufferV2 = @import("../sync/channel.zig").RingBufferV2; const PacketChannel = Channel(Packet); const PacketBatchChannel = Channel(std.ArrayList(Packet)); -const ProtocolRingBuffer = RingBufferV2(ProtocolMessage); - const ProtocolMessage = struct { from_endpoint: EndPoint, message: Protocol }; const ProtocolChannel = Channel(ProtocolMessage); const PingCache = @import("./ping_pong.zig").PingCache; @@ -140,7 +136,6 @@ pub const GossipService = struct { var packet_incoming_channel = PacketBatchChannel.init(allocator, 10000); var packet_outgoing_channel = PacketBatchChannel.init(allocator, 10000); var verified_incoming_channel = ProtocolChannel.init(allocator, 10000); - // var verified_incoming_channel = ProtocolRingBuffer.init(allocator, 100_000); errdefer { packet_incoming_channel.deinit(); @@ -172,6 +167,7 @@ pub const GossipService = struct { var failed_pull_hashes = HashTimeQueue.init(allocator); var push_msg_q = std.ArrayList(CrdsValue).init(allocator); + // // TODO: figure out how to properly shut this guy down on exit // var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); return Self{ @@ -254,14 +250,7 @@ pub const GossipService = struct { // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); // defer self.joinAndExit(&ip_echo_server_listener_handle); - // var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ - // &self.gossip_socket, - // self.packet_incoming_channel, - // self.exit, - // self.logger, - // }); - - var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocketV2, .{ + var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ self.allocator, &self.gossip_socket, self.packet_incoming_channel, @@ -279,8 +268,7 @@ pub const GossipService = struct { defer self.joinAndExit(&build_messages_handle); // outputer thread - // var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ - var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocketV2, .{ + var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ &self.gossip_socket, self.packet_outgoing_channel, self.exit, @@ -293,15 +281,7 @@ pub const GossipService = struct { // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); // defer self.joinAndExit(&ip_echo_server_listener_handle); - // var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ - // &self.gossip_socket, - // self.packet_incoming_channel, - // self.exit, - // self.logger, - // }); - // defer self.joinAndExit(&receiver_handle); - - var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocketV2, .{ + var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ self.allocator, &self.gossip_socket, self.packet_incoming_channel, @@ -316,8 +296,7 @@ pub const GossipService = struct { defer self.joinAndExit(&packet_handle); // outputer thread - // var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ - var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocketV2, .{ + var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ &self.gossip_socket, self.packet_outgoing_channel, self.exit, @@ -336,37 +315,37 @@ pub const GossipService = struct { done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), pub fn callback(task: *Task) void { - var this = @fieldParentPtr(@This(), "task", task); - defer this.done.store(true, std.atomic.Ordering.Release); + var self = @fieldParentPtr(@This(), "task", task); + defer self.done.store(true, std.atomic.Ordering.Release); var protocol_message = bincode.readFromSlice( - this.allocator, + self.allocator, Protocol, - this.packet.data[0..this.packet.size], + self.packet.data[0..self.packet.size], bincode.Params.standard, ) catch { return; }; protocol_message.sanitize() catch { - bincode.free(this.allocator, protocol_message); + bincode.free(self.allocator, protocol_message); return; }; protocol_message.verifySignature() catch { - bincode.free(this.allocator, protocol_message); + bincode.free(self.allocator, protocol_message); return; }; const msg = ProtocolMessage{ - .from_endpoint = this.packet.addr, - // TODO: remove this copy (its on the heap - should just need a ptr) + .from_endpoint = self.packet.addr, + // TODO: remove self copy (its on the heap - should just need a ptr) .message = protocol_message, }; - this.verified_incoming_channel.send(msg) catch unreachable; + self.verified_incoming_channel.send(msg) catch unreachable; // TODO: fix - // _ = this.verified_incoming_channel.push(msg); + // _ = self.verified_incoming_channel.push(msg); // while (!) { // std.time.sleep(100); // } @@ -706,12 +685,12 @@ pub const GossipService = struct { self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); // std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); self.messages_processed.store(msg_count, std.atomic.Ordering.Release); - if (msg_count >= 30_000) { - // if (msg_count >= 1_000) { - std.debug.print("exiting...\n", .{}); - self.exit.store(true, std.atomic.Ordering.Unordered); - break; - } + // if (msg_count >= 30_000) { + // // if (msg_count >= 1_000) { + // std.debug.print("exiting...\n", .{}); + // self.exit.store(true, std.atomic.Ordering.Unordered); + // break; + // } } self.logger.debugf("process_messages loop closed\n", .{}); @@ -1091,12 +1070,12 @@ pub const GossipService = struct { } pub fn callback(task: *Task) void { - var this = @fieldParentPtr(@This(), "task", task); - defer this.done.store(true, std.atomic.Ordering.Release); + var self = @fieldParentPtr(@This(), "task", task); + defer self.done.store(true, std.atomic.Ordering.Release); - switch (this.value.data) { + switch (self.value.data) { .LegacyContactInfo => |*info| { - if (info.id.equals(this.my_pubkey)) { + if (info.id.equals(self.my_pubkey)) { // talking to myself == ignore return; } @@ -1105,15 +1084,15 @@ pub const GossipService = struct { else => return, } - const output_limit = this.output_limit.load(std.atomic.Ordering.Unordered); + const output_limit = self.output_limit.load(std.atomic.Ordering.Unordered); if (output_limit <= 0) { return; } const response_crds_values = pull_response.filterCrdsValues( - this.allocator, - this.crds_table, - this.filter, + self.allocator, + self.crds_table, + self.filter, crds.getWallclockMs(), @as(usize, @max(output_limit, 0)), ) catch { @@ -1122,16 +1101,16 @@ pub const GossipService = struct { }; defer response_crds_values.deinit(); - _ = this.output_limit.fetchSub( + _ = self.output_limit.fetchSub( @as(i64, @intCast(response_crds_values.items.len)), std.atomic.Ordering.Release, ); const maybe_packets = crdsValuesToPackets( - this.allocator, - this.my_pubkey, + self.allocator, + self.my_pubkey, response_crds_values.items, - this.from_endpoint, + self.from_endpoint, ChunkType.PullResponse, ) catch { return; @@ -1139,7 +1118,7 @@ pub const GossipService = struct { if (maybe_packets) |*packets| { defer packets.deinit(); - this.output.appendSlice(packets.items) catch unreachable; + self.output.appendSlice(packets.items) catch unreachable; } } }; @@ -2705,18 +2684,18 @@ const fuzz = @import("./fuzz.zig"); pub const BenchmarkGossipServiceGeneral = struct { pub const min_iterations = 1; - pub const max_iterations = 3; + pub const max_iterations = 1; pub const args = [_]usize{ 1_000, 5_000, - 10_000, + // 10_000, }; pub const arg_names = [_][]const u8{ "1k_msgs", "5k_msgs", - "10k_msg_iters", + // "10k_msg_iters", }; pub fn benchmarkGossipServiceProcessMessages(num_message_iterations: usize) !void { @@ -2752,7 +2731,7 @@ pub const BenchmarkGossipServiceGeneral = struct { }); // send incomign packets/messages - var outgoing_channel = Channel(Packet).init(allocator, 10_000); + var outgoing_channel = Channel(std.ArrayList(Packet)).init(allocator, 10_000); defer outgoing_channel.deinit(); var socket = UdpSocket.create(.ipv4, .udp) catch return error.SocketCreateFailed; @@ -2776,45 +2755,42 @@ pub const BenchmarkGossipServiceGeneral = struct { var sender_keypair = try KeyPair.create(null); var msg_sent: usize = 0; + while (msg_sent < num_message_iterations) { + var packet_output = try std.ArrayList(Packet).initCapacity(allocator, 10); + // send a ping message { var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); - try outgoing_channel.send(msg); + try packet_output.append(msg); msg_sent += 1; } // send a pong message { var msg = try fuzz.randomPongPacket(rng, &keypair, endpoint); - try outgoing_channel.send(msg); + try packet_output.append(msg); msg_sent += 1; } // send a push message { var packets = try fuzz.randomPushMessage(rng, &keypair, address.toEndpoint()); - defer packets.deinit(); - - for (packets.items) |packet| { - try outgoing_channel.send(packet); - msg_sent += 1; - } + try outgoing_channel.send(packets); + msg_sent += packets.items.len; } // send a pull response { var packets = try fuzz.randomPullResponse(rng, &keypair, address.toEndpoint()); - defer packets.deinit(); - - for (packets.items) |packet| { - try outgoing_channel.send(packet); - msg_sent += 1; - } + try outgoing_channel.send(packets); + msg_sent += packets.items.len; } // send a pull request { - var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); - try outgoing_channel.send(packet); + var msg = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); + try packet_output.append(msg); msg_sent += 1; } + + try outgoing_channel.send(packet_output); } // wait for all messages to be processed @@ -2829,7 +2805,11 @@ pub const BenchmarkGossipServiceGeneral = struct { // send a few more to make sure the socket exits for (0..5) |_| { var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); - try outgoing_channel.send(msg); + + var packet_output = try std.ArrayList(Packet).initCapacity(allocator, 1); + packet_output.appendAssumeCapacity(msg); + + try outgoing_channel.send(packet_output); } packet_handle.join(); diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index 45f9e7bfa..cb5d3bff2 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -9,45 +9,10 @@ pub const SOCKET_TIMEOUT: usize = 1000000; pub const PACKETS_PER_BATCH: usize = 64; pub fn readSocket( - socket: *UdpSocket, - incoming_channel: *Channel(Packet), - exit: *const std.atomic.Atomic(bool), - logger: Logger, -) error{ SocketClosed, SocketRecvError, OutOfMemory, ChannelClosed }!void { - var read_buf: [PACKET_DATA_SIZE]u8 = undefined; - var packets_read: u64 = 0; - - while (!exit.load(std.atomic.Ordering.Unordered)) { - const recv_meta = socket.receiveFrom(&read_buf) catch |err| { - if (err == error.WouldBlock) { - // std.time.sleep(std.time.ns_per_ms * 1); - continue; - } else { - logger.debugf("read_socket error: {s}\n", .{@errorName(err)}); - continue; - } - }; - - const bytes_read = recv_meta.numberOfBytes; - if (bytes_read == 0) { - logger.debugf("read_socket closed\n", .{}); - return error.SocketClosed; - } - packets_read +|= 1; - - // send packet through channel - const packet = Packet.init(recv_meta.sender, read_buf, bytes_read); - try incoming_channel.send(packet); - } - logger.debugf("read_socket loop closed\n", .{}); -} - -pub fn readSocketV2( allocator: std.mem.Allocator, socket: *UdpSocket, incoming_channel: *Channel(std.ArrayList(Packet)), exit: *const std.atomic.Atomic(bool), - // logger: Logger, ) !void { //Performance out of the IO without poll // * block on the socket until it's readable @@ -139,7 +104,7 @@ pub fn recvMmsg( return count; } -pub fn sendSocketV2( +pub fn sendSocket( socket: *UdpSocket, outgoing_channel: *Channel(std.ArrayList(Packet)), exit: *const std.atomic.Atomic(bool), @@ -176,36 +141,6 @@ pub fn sendSocketV2( logger.debugf("send_socket loop closed\n", .{}); } -pub fn sendSocket( - socket: *UdpSocket, - outgoing_channel: *Channel(Packet), - exit: *const std.atomic.Atomic(bool), - logger: Logger, -) error{ SocketSendError, OutOfMemory, ChannelClosed }!void { - var packets_sent: u64 = 0; - - while (!exit.load(std.atomic.Ordering.Unordered)) { - const maybe_packets = try outgoing_channel.try_drain(); - if (maybe_packets == null) { - // sleep for 1ms - // std.time.sleep(std.time.ns_per_ms * 1); - continue; - } - const packets = maybe_packets.?; - defer outgoing_channel.allocator.free(packets); - - for (packets) |p| { - const bytes_sent = socket.sendTo(p.addr, p.data[0..p.size]) catch |e| { - logger.debugf("send_socket error: {s}\n", .{@errorName(e)}); - continue; - }; - packets_sent +|= 1; - std.debug.assert(bytes_sent == p.size); - } - } - logger.debugf("send_socket loop closed\n", .{}); -} - pub const BenchmarkPacketProcessing = struct { pub const min_iterations = 3; pub const max_iterations = 5; @@ -221,45 +156,6 @@ pub const BenchmarkPacketProcessing = struct { pub fn benchmarkReadSocket(n_packets: usize) !void { const allocator = std.heap.page_allocator; - var channel = Channel(Packet).init(allocator, n_packets); - defer channel.deinit(); - - var socket = try UdpSocket.create(.ipv4, .udp); - try socket.bindToPort(0); - try socket.setReadTimeout(1000000); // 1 second - - const to_endpoint = try socket.getLocalEndPoint(); - - var exit = std.atomic.Atomic(bool).init(false); - - var handle = try std.Thread.spawn(.{}, readSocket, .{ &socket, channel, &exit, .noop }); - var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, n_packets }); - - var rand = std.rand.DefaultPrng.init(0); - var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - var timer = std.time.Timer.start() catch unreachable; - for (1..(n_packets * 2 + 1)) |i| { - rand.fill(&packet_buf); - _ = try socket.sendTo(to_endpoint, &packet_buf); - // 10Kb per second - // each packet is 1k bytes - // = 10 packets per second - if (i % 10 == 0) { - const elapsed = timer.read(); - if (elapsed < std.time.ns_per_s) { - std.time.sleep(std.time.ns_per_s - elapsed); - } - } - } - - recv_handle.join(); - exit.store(true, std.atomic.Ordering.Unordered); - handle.join(); - } - - pub fn benchmarkReadSocketV2(n_packets: usize) !void { - const allocator = std.heap.page_allocator; - var channel = Channel(std.ArrayList(Packet)).init(allocator, n_packets); defer channel.deinit(); @@ -271,8 +167,8 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); - var handle = try std.Thread.spawn(.{}, readSocketV2, .{ allocator, &socket, channel, &exit }); - var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecvV2, .{ channel, n_packets }); + var handle = try std.Thread.spawn(.{}, readSocket, .{ allocator, &socket, channel, &exit }); + var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, n_packets }); var rand = std.rand.DefaultPrng.init(0); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; @@ -297,41 +193,9 @@ pub const BenchmarkPacketProcessing = struct { exit.store(true, std.atomic.Ordering.Unordered); handle.join(); } - - pub fn benchmarkSendSocket(n_packets: usize) !void { - const allocator = std.heap.page_allocator; - - var channel = Channel(Packet).init(allocator, n_packets); - defer channel.deinit(); - - var socket = try UdpSocket.create(.ipv4, .udp); - try socket.bindToPort(0); - try socket.setReadTimeout(1000000); // 1 second - const to_endpoint = try socket.getLocalEndPoint(); - - var exit = std.atomic.Atomic(bool).init(false); - - var recv_handle = try std.Thread.spawn(.{}, benchmarkSocketRecv, .{ &socket, n_packets }); - - var handle = try std.Thread.spawn(.{}, sendSocket, .{ &socket, channel, &exit, .noop }); - var rand = std.rand.DefaultPrng.init(0); - var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - for (0..n_packets) |_| { - rand.fill(&packet_buf); - try channel.send(Packet.init( - to_endpoint, - packet_buf, - packet_buf.len, - )); - } - - recv_handle.join(); - exit.store(true, std.atomic.Ordering.Unordered); - handle.join(); - } }; -pub fn benchmarkChannelRecvV2( +pub fn benchmarkChannelRecv( channel: *Channel(std.ArrayList(Packet)), n_values_to_receive: usize, ) !void { @@ -348,47 +212,3 @@ pub fn benchmarkChannelRecvV2( } } } - -pub fn benchmarkChannelRecv( - channel: *Channel(Packet), - n_values_to_receive: usize, -) !void { - var count: usize = 0; - while (true) { - const values = (try channel.try_drain()) orelse { - continue; - }; - count += values.len; - if (count >= n_values_to_receive) { - break; - } - } -} - -pub fn benchmarkSocketRecv( - socket: *UdpSocket, - total: usize, -) !void { - var count: usize = 0; - var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - - while (true) { - const recv_meta = socket.receiveFrom(&packet_buf) catch |err| { - if (err == error.WouldBlock) { - continue; - } else { - return error.SocketRecvError; - } - }; - - const bytes_read = recv_meta.numberOfBytes; - if (bytes_read == 0) { - return error.SocketClosed; - } - - count += 1; - if (count == total) { - break; - } - } -} diff --git a/src/sync/channel.zig b/src/sync/channel.zig index 7f17c552a..7efcf856c 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -7,173 +7,6 @@ const assert = std.debug.assert; const Mux = @import("mux.zig").Mux; const Ordering = std.atomic.Ordering; -pub fn RingBufferV2(comptime T: type) type { - return struct { - buffer: []T, - index: usize, - count: Atomic(usize), - allocator: std.mem.Allocator, // just used for deinit - - const Self = @This(); - - pub fn init(allocator: std.mem.Allocator, capacity: usize) *Self { - std.debug.assert(capacity > 0); - var self = allocator.create(Self) catch unreachable; - const buffer = allocator.alloc(T, capacity) catch unreachable; - self.* = RingBufferV2(T){ - .buffer = buffer, - .index = 0, - .count = Atomic(usize).init(0), - .allocator = allocator, - }; - return self; - } - - pub fn deinit(self: *Self) void { - self.allocator.free(self.buffer); - } - - pub inline fn isFull(self: *const Self) bool { - return self.count.load(Ordering.Acquire) == self.buffer.len; - } - - pub inline fn isEmpty(self: *const Self) bool { - return self.count.load(Ordering.Acquire) == 0; - } - - // get next free pointer => fill it up => increment count - pub inline fn getNextFreePtr(self: *Self) ?*T { - if (self.isFull()) return null; - const count = self.count.load(Ordering.Acquire); - return &self.buffer[(self.index + count) % self.buffer.len]; - } - - pub inline fn incrementCount(self: *Self) void { - const count = self.count.fetchAdd(1, Ordering.Release); - _ = count; - } - - // get head pointer => process it => increment head - pub inline fn getHeadPtr(self: *Self) ?*T { - if (self.isEmpty()) return null; - return &self.buffer[self.index % self.buffer.len]; - } - - // higher level functions - pub inline fn push(self: *Self, value: T) bool { - if (self.getNextFreePtr()) |ptr| { - ptr.* = value; - _ = self.count.fetchAdd(1, Ordering.Release); - return true; - } else { - return false; - } - } - - pub inline fn try_drain(self: *Self) !?std.ArrayList(T) { - if (self.isEmpty()) return null; - - const count = self.count.load(Ordering.Acquire); - var items = try std.ArrayList(T).initCapacity(self.allocator, count); - for (0..count) |i| { - var ptr = &self.buffer[(self.index + i) % self.buffer.len]; - items.appendAssumeCapacity(ptr.*); - } - - _ = self.count.fetchSub(count, Ordering.Release); - self.index += count; - - return items; - } - }; -} - -pub fn RingBuffer(comptime T: type) type { - return struct { - buffer: []T, - index: usize, - count: Atomic(usize), - allocator: std.mem.Allocator, // just used for deinit - - const Self = @This(); - - pub fn init(allocator: std.mem.Allocator, capacity: usize) *Self { - std.debug.assert(capacity > 0); - var self = allocator.create(Self) catch unreachable; - const buffer = allocator.alloc(T, capacity) catch unreachable; - self.* = RingBuffer(T){ - .buffer = buffer, - .index = 0, - .count = Atomic(usize).init(0), - .allocator = allocator, - }; - return self; - } - - pub fn deinit(self: *Self) void { - self.allocator.free(self.buffer); - } - - pub fn isFull(self: *const Self) bool { - return self.count.load(Ordering.Acquire) == self.buffer.len; - } - - pub fn isEmpty(self: *const Self) bool { - return self.count.load(Ordering.Acquire) == 0; - } - - // get next free pointer => fill it up => increment count - pub inline fn getNextFreePtr(self: *Self) ?*T { - if (self.isFull()) return null; - const count = self.count.load(Ordering.Acquire); - return &self.buffer[(self.index + count) % self.buffer.len]; - } - - pub inline fn incrementCount(self: *Self) void { - const count = self.count.fetchAdd(1, Ordering.Release); - _ = count; - // std.debug.print("count incremented from {} -> {}\n", .{count, count+1}); - } - - // get head pointer => process it => increment head - pub inline fn getHeadPtr(self: *Self) ?*T { - if (self.isEmpty()) return null; - return &self.buffer[self.index % self.buffer.len]; - } - - pub inline fn consumeAmount(self: *Self, amount: usize) void { - const count = self.count.fetchSub(amount, Ordering.Release); - // std.debug.print("consuming {}: count: {}->{} new index: {}->{}\n", .{amount, count, count - amount, self.index, self.index+amount}); - _ = count; - self.index += amount; - } - - // higher level functions - pub fn push(self: *Self, value: T) bool { - if (self.getNextFreePtr()) |ptr| { - ptr.* = value; - self.incrementCount(); - return true; - } else { - return false; - } - } - - pub fn try_drain(self: *const Self) !?std.ArrayList(*T) { - if (self.isEmpty()) return null; - - const count = self.count.load(Ordering.Acquire); - // std.debug.print("reading: {} -> {} (count = {})\n", .{self.index, self.index + count, count}); - var items = try std.ArrayList(*T).initCapacity(self.allocator, count); - for (0..count) |i| { - var ptr = &self.buffer[(self.index + i) % self.buffer.len]; - items.appendAssumeCapacity(ptr); - } - return items; - } - }; -} - /// A very basic mpmc channel implementation - TODO: replace with a legit channel impl pub fn Channel(comptime T: type) type { return struct { @@ -330,91 +163,6 @@ fn testPacketReceiver(chan: *Channel(Packet), total_recv: usize) void { } } -fn testPacketSenderBuffer(ring_buffer: *RingBuffer(Packet), total_send: usize) void { - var i: usize = 0; - while (i < total_send) { - var packet = Packet.default(); - packet.data[2] = @as(u8, @truncate(i)); - if (ring_buffer.push(packet)) { - i += 1; - } - } -} - -fn testPacketRecvBuffer(ring_buffer: *RingBuffer(Packet), total_recv: usize) void { - var count: usize = 0; - while (count < total_recv) { - if (ring_buffer.getHeadPtr()) |head| { - defer ring_buffer.consumeAmount(1); - _ = head; - count += 1; - // std.debug.print("recv count: {}/{} \n", .{count, total_recv}); - } - } -} - -fn testPacketRecvBufferDrain(ring_buffer: *RingBuffer(Packet), total_recv: usize) void { - var count: usize = 0; - while (count < total_recv) { - if (ring_buffer.try_drain() catch unreachable) |ptrs| { - for (ptrs.items) |ptr| { - // std.debug.print("{any}", .{ptr.*.data[2]}); - _ = ptr; - count += 1; - } - defer { - ptrs.deinit(); - ring_buffer.consumeAmount(ptrs.items.len); - } - } - } -} - -fn testPacketSenderBufferV2(ring_buffer: *RingBufferV2(Packet), total_send: usize) void { - var i: usize = 0; - while (i < total_send) { - var packet = Packet.default(); - packet.data[2] = @as(u8, @truncate(i)); - if (ring_buffer.push(packet)) { - i += 1; - } - } -} - -fn testPacketRecvBufferDrainV2(ring_buffer: *RingBufferV2(Packet), total_recv: usize) void { - var count: usize = 0; - while (count < total_recv) { - if (ring_buffer.try_drain() catch unreachable) |v| { - for (v.items) |val| { - // std.debug.print("{any}", .{val.data[2]}); - _ = val; - count += 1; - } - v.deinit(); - } - } -} - -fn testPointerSender(chan: *BlockPointerChannel, total_send: usize) void { - var allocator = chan.allocator; - var i: usize = 0; - while (i < total_send) : (i += 1) { - var block_ptr = allocator.create(Block) catch unreachable; - block_ptr.* = Block{ .num = @intCast(i) }; - chan.send(block_ptr) catch unreachable; - } - chan.close(); -} - -fn testPointerReceiver(chan: *BlockPointerChannel, recv_count: *Atomic(usize), id: u8) void { - var allocator = chan.allocator; - _ = id; - while (chan.receive()) |v| { - _ = recv_count.fetchAdd(1, .SeqCst); - allocator.destroy(v); - } -} - test "sync.channel: channel works properly" { var ch = BlockChannel.init(testing.allocator, 100); defer ch.deinit(); @@ -460,40 +208,4 @@ pub const BenchmarkChannel = struct { join1.join(); join2.join(); } - - pub fn benchmarkPacketChannelBuffer() !void { - const allocator = std.heap.page_allocator; - var buffer = RingBuffer(Packet).init(allocator, send_count / 2); - defer buffer.deinit(); - - var join1 = try std.Thread.spawn(.{}, testPacketRecvBuffer, .{ buffer, send_count }); - var join2 = try std.Thread.spawn(.{}, testPacketSenderBuffer, .{ buffer, send_count }); - - join1.join(); - join2.join(); - } - - pub fn benchmarkPacketChannelBufferDrain() !void { - const allocator = std.heap.page_allocator; - var buffer = RingBuffer(Packet).init(allocator, send_count / 2); - defer buffer.deinit(); - - var join1 = try std.Thread.spawn(.{}, testPacketRecvBufferDrain, .{ buffer, send_count }); - var join2 = try std.Thread.spawn(.{}, testPacketSenderBuffer, .{ buffer, send_count }); - - join1.join(); - join2.join(); - } - - pub fn benchmarkPacketChannelBufferDrainV2() !void { - const allocator = std.heap.page_allocator; - var buffer = RingBufferV2(Packet).init(allocator, send_count / 2); - defer buffer.deinit(); - - var join1 = try std.Thread.spawn(.{}, testPacketRecvBufferDrainV2, .{ buffer, send_count }); - var join2 = try std.Thread.spawn(.{}, testPacketSenderBufferV2, .{ buffer, send_count }); - - join1.join(); - join2.join(); - } }; From d7b7b23280d2eab1c8a7337b07c32ca858c6391a Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 4 Oct 2023 10:56:34 -0400 Subject: [PATCH 21/72] fix --- src/benchmarks.zig | 6 - src/bincode/bincode.zig | 8 +- src/gossip/gossip_service.zig | 312 ++++++++++++---------------------- src/gossip/packet.zig | 5 +- src/gossip/socket_utils.zig | 6 +- src/sync/channel.zig | 9 +- 6 files changed, 128 insertions(+), 218 deletions(-) diff --git a/src/benchmarks.zig b/src/benchmarks.zig index a6dd6781a..1292ae997 100644 --- a/src/benchmarks.zig +++ b/src/benchmarks.zig @@ -45,12 +45,6 @@ pub fn main() !void { max_time_per_bench, TimeUnits.milliseconds, ); - - // try benchmark( - // @import("gossip/gossip_service.zig").BenchmarkGossipServicePullRequest, - // max_time_per_bench, - // TimeUnits.milliseconds, - // ); } if (std.mem.startsWith(u8, "sync", filter)) { diff --git a/src/bincode/bincode.zig b/src/bincode/bincode.zig index 65a11bc4d..9427072d6 100644 --- a/src/bincode/bincode.zig +++ b/src/bincode/bincode.zig @@ -166,7 +166,7 @@ pub fn Deserializer(comptime Reader: type) type { .Struct => |*info| { inline for (info.fields) |field| { // std.debug.print("freeing {s} on {s}\n", .{ field.name, @typeName(T) }); - if (get_field_config(T, field)) |config| { + if (getFieldConfig(T, field)) |config| { if (config.free) |free_fcn| { // std.debug.print("found free fcn...\n", .{}); var field_value = @field(value, field.name); @@ -229,7 +229,7 @@ pub fn Deserializer(comptime Reader: type) type { inline for (info.fields) |field| { if (!field.is_comptime) { - if (get_field_config(T, field)) |config| { + if (getFieldConfig(T, field)) |config| { if (shouldUseDefaultValue(field, config)) |default_val| { @field(data, field.name) = @as(*const field.type, @ptrCast(@alignCast(default_val))).*; continue; @@ -446,7 +446,7 @@ pub fn Serializer( inline for (info.fields) |field| { if (!field.is_comptime) { - if (get_field_config(T, field)) |config| { + if (getFieldConfig(T, field)) |config| { if (config.skip) { continue; } @@ -517,7 +517,7 @@ pub fn FieldConfig(comptime T: type) type { }; } -pub fn get_field_config(comptime struct_type: type, comptime field: std.builtin.Type.StructField) ?FieldConfig(field.type) { +pub fn getFieldConfig(comptime struct_type: type, comptime field: std.builtin.Type.StructField) ?FieldConfig(field.type) { const bincode_field = "!bincode-config:" ++ field.name; if (@hasDecl(struct_type, bincode_field)) { const config = @field(struct_type, bincode_field); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index fc326efaf..476f175ac 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -8,6 +8,7 @@ const PACKET_DATA_SIZE = @import("packet.zig").PACKET_DATA_SIZE; const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; const Task = ThreadPool.Task; const Batch = ThreadPool.Batch; +const ArrayList = std.ArrayList; const Thread = std.Thread; const AtomicBool = std.atomic.Atomic(bool); @@ -56,10 +57,12 @@ const socket_utils = @import("socket_utils.zig"); const Channel = @import("../sync/channel.zig").Channel; +const PacketBatch = ArrayList(Packet); const PacketChannel = Channel(Packet); -const PacketBatchChannel = Channel(std.ArrayList(Packet)); +const PacketBatchChannel = Channel(PacketBatch); const ProtocolMessage = struct { from_endpoint: EndPoint, message: Protocol }; + const ProtocolChannel = Channel(ProtocolMessage); const PingCache = @import("./ping_pong.zig").PingCache; const PingAndSocketAddr = @import("./ping_pong.zig").PingAndSocketAddr; @@ -110,15 +113,15 @@ pub const GossipService = struct { crds_table_rw: RwMux(CrdsTable), // push message things active_set_rw: RwMux(ActiveSet), - push_msg_queue_mux: Mux(std.ArrayList(CrdsValue)), + push_msg_queue_mux: Mux(ArrayList(CrdsValue)), // pull message things failed_pull_hashes_mux: Mux(HashTimeQueue), - entrypoints: std.ArrayList(SocketAddr), + entrypoints: ArrayList(SocketAddr), ping_cache_rw: RwMux(PingCache), logger: Logger, thread_pool: *ThreadPool, - echo_server: ?echo.Server, + echo_server: echo.Server, // used for benchmarking messages_processed: std.atomic.Atomic(usize) = std.atomic.Atomic(usize).init(0), @@ -129,7 +132,7 @@ pub const GossipService = struct { allocator: std.mem.Allocator, my_contact_info: crds.LegacyContactInfo, my_keypair: KeyPair, - entrypoints: ?std.ArrayList(SocketAddr), + entrypoints: ?ArrayList(SocketAddr), exit: *AtomicBool, logger: Logger, ) error{ OutOfMemory, SocketCreateFailed, SocketBindFailed, SocketSetTimeoutFailed }!Self { @@ -165,10 +168,10 @@ pub const GossipService = struct { gossip_socket.setReadTimeout(socket_utils.SOCKET_TIMEOUT) catch return error.SocketSetTimeoutFailed; // 1 second var failed_pull_hashes = HashTimeQueue.init(allocator); - var push_msg_q = std.ArrayList(CrdsValue).init(allocator); + var push_msg_q = ArrayList(CrdsValue).init(allocator); - // // TODO: figure out how to properly shut this guy down on exit - // var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); + // TODO: figure out how to properly shut this guy down on exit + var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); return Self{ .my_contact_info = my_contact_info, @@ -182,10 +185,10 @@ pub const GossipService = struct { .verified_incoming_channel = verified_incoming_channel, .crds_table_rw = crds_table_rw, .allocator = allocator, - .push_msg_queue_mux = Mux(std.ArrayList(CrdsValue)).init(push_msg_q), + .push_msg_queue_mux = Mux(ArrayList(CrdsValue)).init(push_msg_q), .active_set_rw = RwMux(ActiveSet).init(active_set), .failed_pull_hashes_mux = Mux(HashTimeQueue).init(failed_pull_hashes), - .entrypoints = entrypoints orelse std.ArrayList(SocketAddr).init(allocator), + .entrypoints = entrypoints orelse ArrayList(SocketAddr).init(allocator), .ping_cache_rw = RwMux(PingCache).init( try PingCache.init( allocator, @@ -194,7 +197,7 @@ pub const GossipService = struct { GOSSIP_PING_CACHE_CAPACITY, ), ), - .echo_server = null, + .echo_server = echo_server, .logger = logger, .thread_pool = thread_pool, }; @@ -213,13 +216,30 @@ pub const GossipService = struct { } pub fn deinit(self: *Self) void { - // self.echo_server.deinit(); - + self.echo_server.deinit(); self.gossip_socket.close(); - self.packet_incoming_channel.deinit(); - self.packet_outgoing_channel.deinit(); - self.verified_incoming_channel.deinit(); + { + var buff_lock = self.packet_incoming_channel.buffer.lock(); + var buff: *std.ArrayList(PacketBatch) = buff_lock.mut(); + for (buff.items) |*item| item.deinit(); + buff_lock.unlock(); + self.packet_incoming_channel.deinit(); + } + { + var buff_lock = self.packet_outgoing_channel.buffer.lock(); + var buff: *std.ArrayList(PacketBatch) = buff_lock.mut(); + for (buff.items) |*item| item.deinit(); + buff_lock.unlock(); + self.packet_outgoing_channel.deinit(); + } + { + var buff_lock = self.verified_incoming_channel.buffer.lock(); + var buff: *std.ArrayList(ProtocolMessage) = buff_lock.mut(); + for (buff.items) |*item| bincode.free(self.allocator, &item.message); + buff_lock.unlock(); + self.verified_incoming_channel.deinit(); + } self.entrypoints.deinit(); @@ -247,8 +267,8 @@ pub const GossipService = struct { /// 4) build message loop (to send outgoing message) /// and 5) a socket responder (to send outgoing packets) pub fn run(self: *Self) !void { - // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); - // defer self.joinAndExit(&ip_echo_server_listener_handle); + var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); + defer self.joinAndExit(&ip_echo_server_listener_handle); var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ self.allocator, @@ -278,8 +298,8 @@ pub const GossipService = struct { } pub fn runSpy(self: *Self) !void { - // var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); - // defer self.joinAndExit(&ip_echo_server_listener_handle); + var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); + defer self.joinAndExit(&ip_echo_server_listener_handle); var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ self.allocator, @@ -309,7 +329,6 @@ pub const GossipService = struct { packet: *const Packet, allocator: std.mem.Allocator, verified_incoming_channel: *Channel(ProtocolMessage), - // verified_incoming_channel: *ProtocolRingBuffer, task: Task, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), @@ -343,12 +362,6 @@ pub const GossipService = struct { .message = protocol_message, }; self.verified_incoming_channel.send(msg) catch unreachable; - - // TODO: fix - // _ = self.verified_incoming_channel.push(msg); - // while (!) { - // std.time.sleep(100); - // } } }; @@ -415,6 +428,7 @@ pub const GossipService = struct { self.logger.debugf("verify_packets loop closed\n", .{}); } + // structs used in process_messages loop pub const PullRequestMessage = struct { filter: CrdsFilter, value: CrdsValue, @@ -437,43 +451,40 @@ pub const GossipService = struct { from_endpoint: *EndPoint, }; - pub const PullResponseMessage = struct { crds_values: []CrdsValue, from_pubkey: *Pubkey }; + pub const PullResponseMessage = struct { + crds_values: []CrdsValue, + from_pubkey: *Pubkey, + }; /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; var msg_count: usize = 0; - const init_message_size = socket_utils.PACKETS_PER_BATCH; - - // // batching messages can lead to 1) less lock contention and 2) use of packetbatch which - // // are pre-allocated packets for responses 3) processing messages in parallel - - // batch so we can process in parallel - var pull_requests = try std.ArrayList(PullRequestMessage).initCapacity(self.allocator, init_message_size); - defer pull_requests.deinit(); - // batch so we can reduce the ping_cache locks - var pong_messages = try std.ArrayList(PongMessage).initCapacity(self.allocator, init_message_size); - defer pong_messages.deinit(); + // we batch messages bc: + // 1) less lock contention + // 2) can use packetbatchs (ie, pre-allocated packets) + // 3) processing read-heavy messages in parallel (specifically pull-requests) - // batch so we can respond with a packet batch - var ping_messages = try std.ArrayList(PingMessage).initCapacity(self.allocator, init_message_size); - defer ping_messages.deinit(); + const init_capacity = socket_utils.PACKETS_PER_BATCH; + var push_messages = try ArrayList(PushMessage).initCapacity(self.allocator, init_capacity); + var pull_requests = try ArrayList(PullRequestMessage).initCapacity(self.allocator, init_capacity); + var pull_response_messages = try ArrayList(PullResponseMessage).initCapacity(self.allocator, init_capacity); + var ping_messages = try ArrayList(PingMessage).initCapacity(self.allocator, init_capacity); + var pong_messages = try ArrayList(PongMessage).initCapacity(self.allocator, init_capacity); + var prune_messages = try ArrayList(*PruneData).initCapacity(self.allocator, init_capacity); - var push_messages = try std.ArrayList(PushMessage).initCapacity(self.allocator, init_message_size); - defer push_messages.deinit(); - - var pull_response_messages = try std.ArrayList(PullResponseMessage).initCapacity(self.allocator, init_message_size); - defer pull_response_messages.deinit(); - - var prune_messages = try std.ArrayList(*PruneData).initCapacity(self.allocator, init_message_size); - defer prune_messages.deinit(); + defer { + pull_response_messages.deinit(); + ping_messages.deinit(); + pong_messages.deinit(); + prune_messages.deinit(); + pull_requests.deinit(); + push_messages.deinit(); + } while (!self.exit.load(std.atomic.Ordering.Unordered)) { - // var drain_timer = std.time.Timer.start() catch unreachable; const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); - // const drain_elapsed = drain_timer.read(); - // self.logger.debugf("handle batch msg_drain took {} with {} items\n", .{ drain_elapsed, 1 }); if (maybe_protocol_messages == null) { // // sleep for 1ms @@ -487,26 +498,15 @@ pub const GossipService = struct { const protocol_messages = maybe_protocol_messages.?; defer self.verified_incoming_channel.allocator.free(protocol_messages); - // defer { - // // self.verified_incoming_channel.consumeAmount(protocol_messages.items.len); - // protocol_messages.deinit(); - // } msg_count += protocol_messages.len; // TODO: filter messages based on_shred_version for (protocol_messages) |*protocol_message| { - var from_endpoint: EndPoint = protocol_message.from_endpoint; switch (protocol_message.message) { .PushMessage => |*push| { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - self.logger.debugf("handle batch push took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); - } - try push_messages.append(PushMessage{ .crds_values = push[1], .from_pubkey = &push[0], @@ -514,19 +514,12 @@ pub const GossipService = struct { }); }, .PullResponse => |*pull| { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - self.logger.debugf("handle batch pull_resp took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); - } - try pull_response_messages.append(PullResponseMessage{ .from_pubkey = &pull[0], .crds_values = pull[1], }); }, .PullRequest => |*pull| { - try pull_requests.append(.{ .filter = pull[0], .value = pull[1], @@ -617,7 +610,7 @@ pub const GossipService = struct { // init a new batch of responses // TODO: figure out a way to re-use this allocation instead of freeing after responder sends it - var ping_packet_batch = try std.ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); + var ping_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); // TODO: add back logging @@ -669,7 +662,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; defer { const elapsed = x_timer.read(); - self.logger.debugf("handle batch crds_trim took {} with {} items @{}\n", .{ elapsed, 1, msg_count}); + self.logger.debugf("handle batch crds_trim took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); } var crds_table_lock = self.crds_table_rw.write(); @@ -685,8 +678,8 @@ pub const GossipService = struct { self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); // std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); self.messages_processed.store(msg_count, std.atomic.Ordering.Release); - // if (msg_count >= 30_000) { - // // if (msg_count >= 1_000) { + // if (msg_count >= 30_000) { + // // if (msg_count >= 1_000) { // std.debug.print("exiting...\n", .{}); // self.exit.store(true, std.atomic.Ordering.Unordered); // break; @@ -757,7 +750,7 @@ pub const GossipService = struct { { var push_msg_queue_lock = self.push_msg_queue_mux.lock(); defer push_msg_queue_lock.unlock(); - var push_msg_queue: *std.ArrayList(CrdsValue) = push_msg_queue_lock.mut(); + var push_msg_queue: *ArrayList(CrdsValue) = push_msg_queue_lock.mut(); try push_msg_queue.append(my_contact_info_value); } @@ -810,7 +803,7 @@ pub const GossipService = struct { /// logic for building new push messages which are sent to peers from the /// active set and serialized into packets. - fn buildPushMessages(self: *Self, push_cursor: *u64) !?std.ArrayList(Packet) { + fn buildPushMessages(self: *Self, push_cursor: *u64) !?ArrayList(Packet) { // TODO: find a better static value? var buf: [512]crds.CrdsVersionedValue = undefined; @@ -832,7 +825,7 @@ pub const GossipService = struct { // find new values in crds table // TODO: benchmark different approach of HashMapping(origin, value) first // then deriving the active set per origin in a batch - var push_messages = std.AutoHashMap(EndPoint, std.ArrayList(CrdsValue)).init(self.allocator); + var push_messages = std.AutoHashMap(EndPoint, ArrayList(CrdsValue)).init(self.allocator); defer { var push_iter = push_messages.iterator(); while (push_iter.next()) |push_entry| { @@ -880,7 +873,7 @@ pub const GossipService = struct { if (maybe_peer_entry) |peer_entry| { try peer_entry.value_ptr.append(value); } else { - var peer_entry = try std.ArrayList(CrdsValue).initCapacity(self.allocator, 1); + var peer_entry = try ArrayList(CrdsValue).initCapacity(self.allocator, 1); peer_entry.appendAssumeCapacity(value); try push_messages.put(peer, peer_entry); } @@ -894,12 +887,12 @@ pub const GossipService = struct { const num_values_not_considered = crds_entries.len - num_values_considered; push_cursor.* -= num_values_not_considered; - var packets = std.ArrayList(Packet).init(self.allocator); + var packets = ArrayList(Packet).init(self.allocator); errdefer packets.deinit(); var push_iter = push_messages.iterator(); while (push_iter.next()) |push_entry| { - const crds_values: *const std.ArrayList(CrdsValue) = push_entry.value_ptr; + const crds_values: *const ArrayList(CrdsValue) = push_entry.value_ptr; const to_endpoint: *const EndPoint = push_entry.key_ptr; // send the values as a pull response @@ -925,7 +918,7 @@ pub const GossipService = struct { self: *Self, /// the bloomsize of the pull request's filters bloom_size: usize, - ) !std.ArrayList(Packet) { + ) !ArrayList(Packet) { // get nodes from crds table var buf: [MAX_NUM_PULL_REQUESTS]crds.LegacyContactInfo = undefined; const now = getWallclockMs(); @@ -1004,7 +997,7 @@ pub const GossipService = struct { defer pull_request.deinitCrdsFilters(&filters); // build packet responses - var output = try std.ArrayList(Packet).initCapacity(self.allocator, filters.items.len); + var output = try ArrayList(Packet).initCapacity(self.allocator, filters.items.len); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; // update wallclock and sign @@ -1044,7 +1037,7 @@ pub const GossipService = struct { fn handleBatchPullRequest( self: *Self, - pull_requests: std.ArrayList(PullRequestMessage), + pull_requests: ArrayList(PullRequestMessage), ) void { // self.handleBatchPullRequestSequential(pull_requests) catch {}; self.handleBatchPullRequestParallel(pull_requests) catch |err| { @@ -1059,7 +1052,7 @@ pub const GossipService = struct { filter: *CrdsFilter, value: *CrdsValue, crds_table: *const CrdsTable, - output: std.ArrayList(Packet), + output: ArrayList(Packet), output_limit: *std.atomic.Atomic(i64), task: Task, @@ -1125,7 +1118,7 @@ pub const GossipService = struct { fn handleBatchPullRequestParallel( self: *Self, - pull_requests: std.ArrayList(PullRequestMessage), + pull_requests: ArrayList(PullRequestMessage), ) !void { // TODO: parallelize this @@ -1144,7 +1137,7 @@ pub const GossipService = struct { } const n_requests = pull_requests.items.len; - var valid_indexs = try std.ArrayList(usize).initCapacity(self.allocator, n_requests); + var valid_indexs = try ArrayList(usize).initCapacity(self.allocator, n_requests); defer valid_indexs.deinit(); { @@ -1153,7 +1146,7 @@ pub const GossipService = struct { var ping_cache: *PingCache = ping_cache_lock.mut(); // TODO: only allocate this once - var ping_packets = try std.ArrayList(Packet).initCapacity(self.allocator, n_requests); + var ping_packets = try ArrayList(Packet).initCapacity(self.allocator, n_requests); var count: usize = 0; for (pull_requests.items, 0..) |req, i| { @@ -1198,7 +1191,7 @@ pub const GossipService = struct { // create the pull requests const n_valid_requests = valid_indexs.items.len; - var tasks = try std.ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); + var tasks = try ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); defer { for (tasks.items) |task| { self.allocator.destroy(task); @@ -1216,7 +1209,7 @@ pub const GossipService = struct { for (valid_indexs.items) |i| { // TODO: pre-allocate these tasks // create the thread task - var output = std.ArrayList(Packet).init(self.allocator); + var output = ArrayList(Packet).init(self.allocator); var task = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, .my_pubkey = &self.my_pubkey, @@ -1257,7 +1250,7 @@ pub const GossipService = struct { fn handleBatchPullRequestSequential( self: *Self, - pull_requests: std.ArrayList(PullRequestMessage), + pull_requests: ArrayList(PullRequestMessage), ) !void { for (pull_requests.items) |*pr| { const maybe_resp_packets = try self.handlePullRequest( @@ -1287,7 +1280,7 @@ pub const GossipService = struct { pull_from_endpoint: EndPoint, // logging maybe_log_entry: ?Entry, - ) error{ SerializationError, OutOfMemory, ChannelClosed }!?std.ArrayList(Packet) { + ) error{ SerializationError, OutOfMemory, ChannelClosed }!?ArrayList(Packet) { const now = getWallclockMs(); { @@ -1376,7 +1369,7 @@ pub const GossipService = struct { pub fn handleBatchPullResponses( self: *Self, - pull_response_messages: *const std.ArrayList(PullResponseMessage), + pull_response_messages: *const ArrayList(PullResponseMessage), logger: Logger, ) !void { if (pull_response_messages.items.len == 0) { @@ -1385,7 +1378,7 @@ pub const GossipService = struct { _ = logger; const now = getWallclockMs(); - var failed_insert_ptrs = std.ArrayList(*CrdsValue).init(self.allocator); + var failed_insert_ptrs = ArrayList(*CrdsValue).init(self.allocator); { var crds_table_lock = self.crds_table_rw.write(); var crds_table: *CrdsTable = crds_table_lock.mut(); @@ -1525,7 +1518,7 @@ pub const GossipService = struct { pub fn handleBatchPruneMessages( self: *Self, - prune_messages: *const std.ArrayList(*PruneData), + prune_messages: *const ArrayList(*PruneData), ) !void { var active_set_lock = self.active_set_rw.write(); defer active_set_lock.unlock(); @@ -1587,7 +1580,7 @@ pub const GossipService = struct { failed_origins: *const std.AutoArrayHashMap(Pubkey, void), /// the pubkey of the node which we will send the prune message to prune_destination: Pubkey, - ) error{ CantFindContactInfo, InvalidGossipAddress, OutOfMemory, SignatureError }!std.ArrayList(Packet) { + ) error{ CantFindContactInfo, InvalidGossipAddress, OutOfMemory, SignatureError }!ArrayList(Packet) { const from_contact_info = blk: { var crds_table_lock = self.crds_table_rw.read(); defer crds_table_lock.unlock(); @@ -1603,7 +1596,7 @@ pub const GossipService = struct { const failed_origin_len = failed_origins.keys().len; var n_packets = failed_origins.keys().len / MAX_PRUNE_DATA_NODES; - var prune_packets = try std.ArrayList(Packet).initCapacity(self.allocator, n_packets); + var prune_packets = try ArrayList(Packet).initCapacity(self.allocator, n_packets); errdefer prune_packets.deinit(); const now = getWallclockMs(); @@ -1637,7 +1630,7 @@ pub const GossipService = struct { pub fn handleBatchPushMessages( self: *Self, - batch_push_messages: *const std.ArrayList(PushMessage), + batch_push_messages: *const ArrayList(PushMessage), logger: Logger, ) !void { if (batch_push_messages.items.len == 0) { @@ -1720,7 +1713,7 @@ pub const GossipService = struct { var n_packets = pubkey_to_failed_origins_iter.len; if (n_packets == 0) return; - var prune_packet_batch = try std.ArrayList(Packet).initCapacity(self.allocator, n_packets); + var prune_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_packets); prune_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_packets); var count: usize = 0; @@ -1851,7 +1844,7 @@ pub const GossipService = struct { ) void { var push_msg_queue_lock = self.push_msg_queue_mux.lock(); defer push_msg_queue_lock.unlock(); - var push_msg_queue: *std.ArrayList(CrdsValue) = push_msg_queue_lock.mut(); + var push_msg_queue: *ArrayList(CrdsValue) = push_msg_queue_lock.mut(); var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); @@ -1865,7 +1858,7 @@ pub const GossipService = struct { /// serializes a list of ping messages into Packets and sends them out pub fn sendPings( self: *Self, - pings: std.ArrayList(PingAndSocketAddr), + pings: ArrayList(PingAndSocketAddr), ) error{ OutOfMemory, ChannelClosed, SerializationError }!void { var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; @@ -1955,7 +1948,7 @@ pub fn crdsValuesToPackets( crds_values: []CrdsValue, to_endpoint: *const EndPoint, chunk_type: ChunkType, -) error{ OutOfMemory, SerializationError }!?std.ArrayList(Packet) { +) error{ OutOfMemory, SerializationError }!?ArrayList(Packet) { if (crds_values.len == 0) return null; const indexs = try chunkValuesIntoPacketIndexs( @@ -1967,7 +1960,7 @@ pub fn crdsValuesToPackets( var chunk_iter = std.mem.window(usize, indexs.items, 2, 1); var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; - var packets = try std.ArrayList(Packet).initCapacity(allocator, indexs.items.len -| 1); + var packets = try ArrayList(Packet).initCapacity(allocator, indexs.items.len -| 1); errdefer packets.deinit(); while (chunk_iter.next()) |window| { @@ -1993,8 +1986,8 @@ pub fn chunkValuesIntoPacketIndexs( allocator: std.mem.Allocator, crds_values: []CrdsValue, max_chunk_bytes: usize, -) error{ OutOfMemory, SerializationError }!std.ArrayList(usize) { - var packet_indexs = try std.ArrayList(usize).initCapacity(allocator, 1); +) error{ OutOfMemory, SerializationError }!ArrayList(usize) { + var packet_indexs = try ArrayList(usize).initCapacity(allocator, 1); errdefer packet_indexs.deinit(); packet_indexs.appendAssumeCapacity(0); @@ -2050,7 +2043,7 @@ test "gossip.gossip_service: tests handle_prune_messages" { // add some peers var lg = gossip_service.crds_table_rw.write(); - var peers = std.ArrayList(crds.LegacyContactInfo).init(allocator); + var peers = ArrayList(crds.LegacyContactInfo).init(allocator); defer peers.deinit(); for (0..10) |_| { var rand_keypair = try KeyPair.create(null); @@ -2227,7 +2220,7 @@ test "gossip.gossip_service: tests handle_pull_request" { defer packets.?.deinit(); try std.testing.expect(packets.?.items.len > 0); - var batch_requests = std.ArrayList(GossipService.PullRequestMessage).init(allocator); + var batch_requests = ArrayList(GossipService.PullRequestMessage).init(allocator); defer batch_requests.deinit(); var from_endpoint = addr.toEndpoint(); @@ -2265,7 +2258,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { defer gossip_service.deinit(); var push_from = Pubkey.random(rng.random(), .{}); - var values = std.ArrayList(CrdsValue).init(allocator); + var values = ArrayList(CrdsValue).init(allocator); defer values.deinit(); for (0..10) |_| { var value = try CrdsValue.randomWithIndex(rng.random(), &my_keypair, 0); @@ -2382,7 +2375,7 @@ test "gossip.gossip_service: test build_push_messages" { defer gossip_service.deinit(); // add some peers - var peers = std.ArrayList(crds.LegacyContactInfo).init(allocator); + var peers = ArrayList(crds.LegacyContactInfo).init(allocator); defer peers.deinit(); var lg = gossip_service.crds_table_rw.write(); for (0..10) |_| { @@ -2477,13 +2470,13 @@ test "gossip.gossip_service: test packet verification" { var buf = [_]u8{0} ** PACKET_DATA_SIZE; var out = try bincode.writeToSlice(buf[0..], protocol_msg, bincode.Params{}); var packet = Packet.init(from, buf, out.len); - var packet_batch = std.ArrayList(Packet).init(allocator); + var packet_batch = ArrayList(Packet).init(allocator); for (0..3) |_| { try packet_batch.append(packet); } try packet_channel.send(packet_batch); - var packet_batch_2 = std.ArrayList(Packet).init(allocator); + var packet_batch_2 = ArrayList(Packet).init(allocator); // send one which fails sanitization var value_v2 = try CrdsValue.initSigned(crds.CrdsData.randomFromIndex(rng.random(), 2), &keypair); @@ -2670,12 +2663,16 @@ test "gossip.gossip_service: init, exit, and deinit" { var handle = try std.Thread.spawn( .{}, - GossipService.run, + GossipService.runSpy, .{&gossip_service}, ); - // gossip_service.echo_server.kill(); + gossip_service.echo_server.kill(); exit.store(true, std.atomic.Ordering.Unordered); + + const buf: [10]u8 = undefined; + _ = try gossip_service.gossip_socket.sendTo(gossip_address.toEndpoint(), &buf); + handle.join(); gossip_service.deinit(); } @@ -2731,7 +2728,7 @@ pub const BenchmarkGossipServiceGeneral = struct { }); // send incomign packets/messages - var outgoing_channel = Channel(std.ArrayList(Packet)).init(allocator, 10_000); + var outgoing_channel = Channel(ArrayList(Packet)).init(allocator, 10_000); defer outgoing_channel.deinit(); var socket = UdpSocket.create(.ipv4, .udp) catch return error.SocketCreateFailed; @@ -2757,7 +2754,7 @@ pub const BenchmarkGossipServiceGeneral = struct { var msg_sent: usize = 0; while (msg_sent < num_message_iterations) { - var packet_output = try std.ArrayList(Packet).initCapacity(allocator, 10); + var packet_output = try ArrayList(Packet).initCapacity(allocator, 10); // send a ping message { @@ -2806,7 +2803,7 @@ pub const BenchmarkGossipServiceGeneral = struct { for (0..5) |_| { var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); - var packet_output = try std.ArrayList(Packet).initCapacity(allocator, 1); + var packet_output = try ArrayList(Packet).initCapacity(allocator, 1); packet_output.appendAssumeCapacity(msg); try outgoing_channel.send(packet_output); @@ -2817,86 +2814,3 @@ pub const BenchmarkGossipServiceGeneral = struct { outgoing_handle.join(); } }; - -pub const BenchmarkGossipServicePullRequest = struct { - pub const min_iterations = 1; - pub const max_iterations = 1; - - pub const args = [_]usize{ - 1_000, - }; - - pub const arg_names = [_][]const u8{ - "1_000", - }; - - pub fn benchmarkPullRequests(num_message_iterations: usize) !void { - const allocator = std.heap.page_allocator; - var address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); - - var keypair = try KeyPair.create(null); - var pubkey = Pubkey.fromPublicKey(&keypair.public_key, false); - var contact_info = crds.LegacyContactInfo.default(pubkey); - contact_info.shred_version = 19; - contact_info.gossip = address; - - // var logger = Logger.init(allocator, .debug); - // defer logger.deinit(); - // logger.spawn(); - var logger: Logger = .noop; - - var exit = AtomicBool.init(false); - var gossip_service = try GossipService.init( - allocator, - contact_info, - keypair, - null, - &exit, - logger, - ); - defer gossip_service.deinit(); - - // var packet_handle = try Thread.spawn(.{}, GossipService.processMessages, .{ - // &gossip_service, - // }); - - var packet_handle = try Thread.spawn(.{}, GossipService.runSpy, .{ - &gossip_service, - }); - - var rand = std.rand.DefaultPrng.init(19); - var rng = rand.random(); - - var sender_keypair = try KeyPair.create(null); - - var msg_sent: usize = 0; - for (0..num_message_iterations) |i| { - // send a push message - if (i % 2 == 0) { - var packets = try fuzz.randomPushMessage(rng, &sender_keypair, address.toEndpoint()); - defer packets.deinit(); - - for (packets.items) |packet| { - try gossip_service.packet_incoming_channel.send(packet); - msg_sent += 1; - } - } else { - // send a pull request - var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); - try gossip_service.packet_incoming_channel.send(packet); - msg_sent += 1; - } - } - - while (true) { - const v = gossip_service.messages_processed.load(std.atomic.Ordering.Unordered); - if (v == msg_sent) { - break; - } - std.time.sleep(std.time.ns_per_s); - } - - exit.store(true, std.atomic.Ordering.Unordered); - packet_handle.join(); - } -}; diff --git a/src/gossip/packet.zig b/src/gossip/packet.zig index 6a70a56e9..f096dcaa7 100644 --- a/src/gossip/packet.zig +++ b/src/gossip/packet.zig @@ -23,10 +23,7 @@ pub const Packet = struct { pub fn default() Self { return .{ - .addr = .{ - .port = 0, - .address = .{ .ipv4 = network.Address.IPv4.any } - }, + .addr = .{ .port = 0, .address = .{ .ipv4 = network.Address.IPv4.any } }, .data = undefined, .size = 0, }; diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index cb5d3bff2..602cec9a3 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -22,11 +22,13 @@ pub fn readSocket( const MAX_WAIT_NS = std.time.ns_per_ms; // 1ms + var packet_batch: std.ArrayList(Packet) = undefined; + while (!exit.load(std.atomic.Ordering.Unordered)) { // init a new batch var count: usize = 0; const capacity = PACKETS_PER_BATCH; - var packet_batch = try std.ArrayList(Packet).initCapacity( + packet_batch = try std.ArrayList(Packet).initCapacity( allocator, capacity, ); @@ -66,6 +68,8 @@ pub fn readSocket( } try incoming_channel.send(packet_batch); } + + packet_batch.deinit(); } pub fn recvMmsg( diff --git a/src/sync/channel.zig b/src/sync/channel.zig index 7efcf856c..4d8d1b1c8 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -27,9 +27,10 @@ pub fn Channel(comptime T: type) type { } pub fn deinit(self: *Self) void { - var buff = self.buffer.lock(); - buff.mut().deinit(); - buff.unlock(); + var buff_lock = self.buffer.lock(); + var buff: *std.ArrayList(T) = buff_lock.mut(); + buff.deinit(); + buff_lock.unlock(); self.allocator.destroy(self); } @@ -150,7 +151,7 @@ const Packet = @import("../gossip/packet.zig").Packet; fn testPacketSender(chan: *Channel(Packet), total_send: usize) void { var i: usize = 0; while (i < total_send) : (i += 1) { - var packet = Packet.default(); + var packet = Packet.default(); chan.send(packet) catch unreachable; } } From 909ec87ac5c911add991e6a115022f09abf6d6e8 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 4 Oct 2023 14:59:22 -0400 Subject: [PATCH 22/72] fix gossip tests --- src/gossip/gossip_service.zig | 470 ++++++++-------------------------- src/gossip/socket_utils.zig | 26 +- 2 files changed, 122 insertions(+), 374 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 476f175ac..e7801691e 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -219,27 +219,27 @@ pub const GossipService = struct { self.echo_server.deinit(); self.gossip_socket.close(); - { + { var buff_lock = self.packet_incoming_channel.buffer.lock(); var buff: *std.ArrayList(PacketBatch) = buff_lock.mut(); for (buff.items) |*item| item.deinit(); buff_lock.unlock(); self.packet_incoming_channel.deinit(); } - { - var buff_lock = self.packet_outgoing_channel.buffer.lock(); - var buff: *std.ArrayList(PacketBatch) = buff_lock.mut(); - for (buff.items) |*item| item.deinit(); - buff_lock.unlock(); - self.packet_outgoing_channel.deinit(); - } - { + { var buff_lock = self.verified_incoming_channel.buffer.lock(); var buff: *std.ArrayList(ProtocolMessage) = buff_lock.mut(); for (buff.items) |*item| bincode.free(self.allocator, &item.message); buff_lock.unlock(); self.verified_incoming_channel.deinit(); } + { + var buff_lock = self.packet_outgoing_channel.buffer.lock(); + var buff: *std.ArrayList(PacketBatch) = buff_lock.mut(); + for (buff.items) |*item| item.deinit(); + buff_lock.unlock(); + self.packet_outgoing_channel.deinit(); + } self.entrypoints.deinit(); @@ -382,6 +382,11 @@ pub const GossipService = struct { verify_task_heap.* = verify_task; tasks[i] = verify_task_heap; } + defer { + for (tasks) |task| { + self.allocator.destroy(task); + } + } while (!self.exit.load(std.atomic.Ordering.Unordered)) { // var drain_timer = std.time.Timer.start() catch unreachable; @@ -469,13 +474,13 @@ pub const GossipService = struct { const init_capacity = socket_utils.PACKETS_PER_BATCH; var push_messages = try ArrayList(PushMessage).initCapacity(self.allocator, init_capacity); var pull_requests = try ArrayList(PullRequestMessage).initCapacity(self.allocator, init_capacity); - var pull_response_messages = try ArrayList(PullResponseMessage).initCapacity(self.allocator, init_capacity); + var pull_responses = try ArrayList(PullResponseMessage).initCapacity(self.allocator, init_capacity); var ping_messages = try ArrayList(PingMessage).initCapacity(self.allocator, init_capacity); var pong_messages = try ArrayList(PongMessage).initCapacity(self.allocator, init_capacity); var prune_messages = try ArrayList(*PruneData).initCapacity(self.allocator, init_capacity); defer { - pull_response_messages.deinit(); + pull_responses.deinit(); ping_messages.deinit(); pong_messages.deinit(); prune_messages.deinit(); @@ -514,7 +519,7 @@ pub const GossipService = struct { }); }, .PullResponse => |*pull| { - try pull_response_messages.append(PullResponseMessage{ + try pull_responses.append(PullResponseMessage{ .from_pubkey = &pull[0], .crds_values = pull[1], }); @@ -594,13 +599,13 @@ pub const GossipService = struct { } // PULL RESP - if (pull_response_messages.items.len > 0) { + if (pull_responses.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; - const length = pull_response_messages.items.len; - try self.handleBatchPullResponses(&pull_response_messages, self.logger); + const length = pull_responses.items.len; + try self.handleBatchPullResponses(&pull_responses, self.logger); const elapsed = x_timer.read(); self.logger.debugf("handle batch pull_resp took {} with {} items @{}\n", .{ elapsed, length, msg_count }); - pull_response_messages.clearRetainingCapacity(); + pull_responses.clearRetainingCapacity(); } // PING @@ -612,6 +617,7 @@ pub const GossipService = struct { // TODO: figure out a way to re-use this allocation instead of freeing after responder sends it var ping_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); + errdefer ping_packet_batch.deinit(); // TODO: add back logging @@ -1039,7 +1045,6 @@ pub const GossipService = struct { self: *Self, pull_requests: ArrayList(PullRequestMessage), ) void { - // self.handleBatchPullRequestSequential(pull_requests) catch {}; self.handleBatchPullRequestParallel(pull_requests) catch |err| { std.debug.print("handleBatchPullRequestParallel failed: {}\n", .{err}); }; @@ -1120,9 +1125,8 @@ pub const GossipService = struct { self: *Self, pull_requests: ArrayList(PullRequestMessage), ) !void { - // TODO: parallelize this - // update the callers + // TODO: parallelize this const now = getWallclockMs(); { var crds_table_lock = self.crds_table_rw.write(); @@ -1182,6 +1186,8 @@ pub const GossipService = struct { // send the pings if (count > 0) { try self.packet_outgoing_channel.send(ping_packets); + } else { + ping_packets.deinit(); } } @@ -1248,125 +1254,10 @@ pub const GossipService = struct { } } - fn handleBatchPullRequestSequential( - self: *Self, - pull_requests: ArrayList(PullRequestMessage), - ) !void { - for (pull_requests.items) |*pr| { - const maybe_resp_packets = try self.handlePullRequest( - pr.value, - pr.filter, - pr.from_endpoint, - null, - ); - if (maybe_resp_packets) |*resp_packets| { - for (resp_packets.items) |packet| { - try self.packet_outgoing_channel.send(packet); - } - } - } - } - - /// logic for handling a pull request message - /// values which are missing in the pull request filter are returned as a pull response - /// which are serialized into packets. - fn handlePullRequest( - self: *Self, - /// the crds value associated with the pull request - pull_value: CrdsValue, - /// the crds filter of the pull request - pull_filter: CrdsFilter, - /// the endpoint of the peer sending the pull request (/who to send the pull response to) - pull_from_endpoint: EndPoint, - // logging - maybe_log_entry: ?Entry, - ) error{ SerializationError, OutOfMemory, ChannelClosed }!?ArrayList(Packet) { - const now = getWallclockMs(); - - { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - std.debug.print("pull_request crds_table_insert took {}ns\n", .{elapsed}); - } - - var crds_table_lock = self.crds_table_rw.write(); - defer crds_table_lock.unlock(); - var crds_table: *CrdsTable = crds_table_lock.mut(); - - crds_table.insert(pull_value, now) catch {}; - crds_table.updateRecordTimestamp(pull_value.id(), now); - } - - // filter out valid peers and send ping messages to peers - var now_instant = std.time.Instant.now() catch @panic("time is not supported on this OS!"); - var puller_socket_addr = SocketAddr.fromEndpoint(&pull_from_endpoint); - - var ping_cache_lock = self.ping_cache_rw.write(); - var ping_cache: *PingCache = ping_cache_lock.mut(); - var result = ping_cache.check( - now_instant, - .{ pull_value.id(), puller_socket_addr }, - &self.my_keypair, - ); - ping_cache_lock.unlock(); - - // send a ping - if (result.maybe_ping) |ping| { - if (maybe_log_entry) |log_entry| { - _ = log_entry.field("pings_sent", 1); - } - var ping_buff = [_]u8{0} ** PACKET_DATA_SIZE; - var protocol_msg = Protocol{ .PingMessage = ping }; - var serialized_ping = bincode.writeToSlice(&ping_buff, protocol_msg, .{}) catch return error.SerializationError; - var packet = Packet.init(pull_from_endpoint, ping_buff, serialized_ping.len); - try self.packet_outgoing_channel.send(packet); - } - - // peer hasnt responded to a ping = dont send a pull response - if (!result.passes_ping_check) { - return null; - } - - const crds_values = blk: { - var crds_table_lock = self.crds_table_rw.read(); - defer crds_table_lock.unlock(); - - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - std.debug.print("pull_request filterCrdsValues took {}ns\n", .{elapsed}); - } - - break :blk try pull_response.filterCrdsValues( - self.allocator, - crds_table_lock.get(), - &pull_filter, - pull_value.wallclock(), - MAX_NUM_CRDS_VALUES_PULL_RESPONSE, - ); - }; - defer crds_values.deinit(); - - if (maybe_log_entry) |log_entry| { - _ = log_entry.field("num_crds_values_resp", crds_values.items.len); - } - - if (crds_values.items.len == 0) { - return null; - } - - // send the values as a pull response - const packets = try crdsValuesToPackets( - self.allocator, - &self.my_pubkey, - crds_values.items, - &pull_from_endpoint, - ChunkType.PullResponse, - ); - return packets; - } - + /// logic for handling a pull response message. + /// successful inserted values, have their origin value timestamps updated. + /// failed inserts (ie, too old or duplicate values) are added to the failed pull hashes so that they can be + /// included in the next pull request (so we dont receive them again). pub fn handleBatchPullResponses( self: *Self, pull_response_messages: *const ArrayList(PullResponseMessage), @@ -1379,6 +1270,8 @@ pub const GossipService = struct { const now = getWallclockMs(); var failed_insert_ptrs = ArrayList(*CrdsValue).init(self.allocator); + defer failed_insert_ptrs.deinit(); + { var crds_table_lock = self.crds_table_rw.write(); var crds_table: *CrdsTable = crds_table_lock.mut(); @@ -1439,83 +1332,9 @@ pub const GossipService = struct { } } - /// logic for handling a pull response message. - /// successful inserted values, have their origin value timestamps updated. - /// failed inserts (ie, too old or duplicate values) are added to the failed pull hashes so that they can be - /// included in the next pull request (so we dont receive them again). - fn handlePullResponse( - self: *Self, - /// the array of values to insert into the crds table - crds_values: []CrdsValue, - // logging info - maybe_pull_log_entry: ?Entry, - ) error{OutOfMemory}!void { - // TODO: benchmark and compare with labs' preprocessing - const now = getWallclockMs(); - var crds_table_lock = self.crds_table_rw.write(); - var crds_table: *CrdsTable = crds_table_lock.mut(); - - const insert_results = try crds_table.insertValues( - crds_values, - CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, - true, - true, - ); - - // silently insert the timeout values - // (without updating all associated origin values) - const timeout_indexs = insert_results.timeouts.?; - defer timeout_indexs.deinit(); - for (timeout_indexs.items) |index| { - crds_table.insert( - crds_values[index], - now, - ) catch {}; - } - - // update the contactInfo timestamps of the successful inserts - // (and all other origin values) - const successful_insert_indexs = insert_results.inserted.?; - defer successful_insert_indexs.deinit(); - for (successful_insert_indexs.items) |index| { - const origin = crds_values[index].id(); - crds_table.updateRecordTimestamp(origin, now); - } - crds_table_lock.unlock(); - - // track failed inserts - to use when constructing pull requests - var failed_insert_indexs = insert_results.failed.?; - defer failed_insert_indexs.deinit(); - { - var failed_pull_hashes_lock = self.failed_pull_hashes_mux.lock(); - var failed_pull_hashes: *HashTimeQueue = failed_pull_hashes_lock.mut(); - defer failed_pull_hashes_lock.unlock(); - - const failed_insert_cutoff_timestamp = now -| FAILED_INSERTS_RETENTION_MS; - try failed_pull_hashes.trim(failed_insert_cutoff_timestamp); - - var buf: [PACKET_DATA_SIZE]u8 = undefined; - for (failed_insert_indexs.items) |insert_index| { - const value = crds_values[insert_index]; - var bytes = bincode.writeToSlice(&buf, value, bincode.Params.standard) catch { - std.debug.print("handle_pull_response: failed to serialize crds value: {any}\n", .{value}); - continue; - }; - const value_hash = Hash.generateSha256Hash(bytes); - - try failed_pull_hashes.insert(value_hash, now); - } - } - - // update logs - if (maybe_pull_log_entry) |pull_log_entry| { - _ = pull_log_entry - .field("num_timeout_values", timeout_indexs.items.len) - .field("num_success_insert_values", successful_insert_indexs.items.len) - .field("num_failed_insert_values", failed_insert_indexs.items.len); - } - } - + /// logic for handling a prune message. verifies the prune message + /// is not too old, and that the destination pubkey is the local node, + /// then updates the active set to prune the list of origin Pubkeys. pub fn handleBatchPruneMessages( self: *Self, prune_messages: *const ArrayList(*PruneData), @@ -1536,42 +1355,6 @@ pub const GossipService = struct { } } - /// logic for handling a prune message. verifies the prune message - /// is not too old, and that the destination pubkey is the local node, - /// then updates the active set to prune the list of origin Pubkeys. - fn handlePruneMessage( - self: *Self, - /// the prune message to process - prune_data: *const PruneData, - ) error{ PruneMessageTooOld, BadDestination }!void { - const now = getWallclockMs(); - const prune_wallclock = prune_data.wallclock; - const too_old = prune_wallclock < now -| CRDS_GOSSIP_PRUNE_MSG_TIMEOUT_MS; - if (too_old) { - return error.PruneMessageTooOld; - } - - const bad_destination = !prune_data.destination.equals(&self.my_pubkey); - if (bad_destination) { - return error.BadDestination; - } - - // update active set - const from_pubkey = prune_data.pubkey; - - // TODO: process in batches to remove this lock - var active_set_lock = self.active_set_rw.write(); - defer active_set_lock.unlock(); - - var active_set: *ActiveSet = active_set_lock.mut(); - for (prune_data.prunes) |origin| { - if (origin.equals(&self.my_pubkey)) { - continue; - } - active_set.prune(from_pubkey, origin); - } - } - /// builds a prune message for a list of origin Pubkeys and serializes the values /// into packets to send to the prune_destination. fn buildPruneMessage( @@ -1745,66 +1528,6 @@ pub const GossipService = struct { try self.packet_outgoing_channel.send(prune_packet_batch); } - /// logic for handling push messages. crds values from the push message - /// are inserted into the crds table. the origin pubkeys of values which - /// fail the insertion are returned to generate prune messages. - fn handlePushMessage( - self: *Self, - push_values: []CrdsValue, - ) error{OutOfMemory}!std.AutoArrayHashMap(Pubkey, void) { - const failed_insert_indexs = blk: { - var crds_table_lock = self.crds_table_rw.write(); - defer crds_table_lock.unlock(); - - var crds_table: *CrdsTable = crds_table_lock.mut(); - var result = try crds_table.insertValues( - push_values, - CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS, - false, - false, - ); - break :blk result.failed.?; - }; - defer failed_insert_indexs.deinit(); - - // origins are used to generate prune messages - // hashmap to account for duplicates - var failed_origins = std.AutoArrayHashMap(Pubkey, void).init(self.allocator); - errdefer failed_origins.deinit(); - - if (failed_insert_indexs.items.len == 0) { - return failed_origins; - } - - for (failed_insert_indexs.items) |index| { - const origin = push_values[index].id(); - try failed_origins.put(origin, {}); - } - return failed_origins; - } - - /// builds a corresponding Pong message for a given Ping message and serializes the - /// protocol message into a Packet. - fn handlePingMessage( - self: *Self, - /// the ping message to build a Pong message for - ping: *const Ping, - /// the endpoint to send the Pong message - from_endpoint: EndPoint, - ) error{ SignatureError, SerializationError }!Packet { - const pong = try Pong.init(ping, &self.my_keypair); - const pong_message = Protocol{ - .PongMessage = pong, - }; - - // write to packet - var buf: [PACKET_DATA_SIZE]u8 = undefined; - const msg = bincode.writeToSlice(&buf, pong_message, bincode.Params.standard) catch return error.SerializationError; - const packet = Packet.init(from_endpoint, buf, msg.len); - - return packet; - } - /// removes old values from the crds table and failed pull hashes struct /// based on the current time. This includes triming the purged values from the /// crds table, triming the max number of pubkeys in the crds table, and removing @@ -1860,16 +1583,20 @@ pub const GossipService = struct { self: *Self, pings: ArrayList(PingAndSocketAddr), ) error{ OutOfMemory, ChannelClosed, SerializationError }!void { - var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; + const n_pings = pings.items.len; + var packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_pings); + errdefer packet_batch.deinit(); + packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_pings); - for (pings.items) |ping_and_addr| { + for (pings.items, 0..) |ping_and_addr, i| { const protocol_msg = Protocol{ .PingMessage = ping_and_addr.ping }; - var serialized_ping = bincode.writeToSlice(&packet_buf, protocol_msg, .{}) catch return error.SerializationError; - var to_endpoint = ping_and_addr.socket.toEndpoint(); - var packet = Packet.init(to_endpoint, packet_buf, serialized_ping.len); - try self.packet_outgoing_channel.send(packet); + var packet = &packet_batch.items[i]; + var serialized_ping = bincode.writeToSlice(&packet.data, protocol_msg, .{}) catch return error.SerializationError; + packet.size = serialized_ping.len; + packet.addr = ping_and_addr.socket.toEndpoint(); } + try self.packet_outgoing_channel.send(packet_batch); } /// returns a list of valid gossip nodes. this works by reading @@ -2076,7 +1803,11 @@ test "gossip.gossip_service: tests handle_prune_messages" { }; try prune_data.sign(&my_keypair); - try gossip_service.handlePruneMessage(&prune_data); + var data = std.ArrayList(*PruneData).init(allocator); + defer data.deinit(); + + try data.append(&prune_data); + try gossip_service.handleBatchPruneMessages(&data); var as_lock2 = gossip_service.active_set_rw.read(); var as2: *const ActiveSet = as_lock2.get(); @@ -2118,7 +1849,15 @@ test "gossip.gossip_service: tests handle_pull_response" { crds_values[i] = value; } - try gossip_service.handlePullResponse(&crds_values, null); + var data = ArrayList(GossipService.PullResponseMessage).init(allocator); + defer data.deinit(); + + try data.append(GossipService.PullResponseMessage{ + .crds_values = &crds_values, + .from_pubkey = &my_pubkey, + }); + + try gossip_service.handleBatchPullResponses(&data, logger); // make sure values are inserted var crds_table_lock = gossip_service.crds_table_rw.read(); @@ -2129,7 +1868,7 @@ test "gossip.gossip_service: tests handle_pull_response" { crds_table_lock.unlock(); // try inserting again with same values (should all fail) - try gossip_service.handlePullResponse(&crds_values, null); + try gossip_service.handleBatchPullResponses(&data, logger); var lg = gossip_service.failed_pull_hashes_mux.lock(); var failed_pull_hashes: *HashTimeQueue = lg.mut(); @@ -2195,14 +1934,17 @@ test "gossip.gossip_service: tests handle_pull_request" { var bloom = try Bloom.random(allocator, 100, 0.1, N_FILTER_BITS); defer bloom.deinit(); + var rando_keypair = try KeyPair.create([_]u8{22} ** 32); + var rando_pubkey = Pubkey.fromPublicKey(&rando_keypair.public_key, true); + var ci_data = crds.CrdsData.randomFromIndex(rng.random(), 0); - ci_data.LegacyContactInfo.id = my_pubkey; - var crds_value = try CrdsValue.initSigned(ci_data, &my_keypair); + ci_data.LegacyContactInfo.id = rando_pubkey; + var crds_value = try CrdsValue.initSigned(ci_data, &rando_keypair); const addr = SocketAddr.random(rng.random()); var ping_lock = gossip_service.ping_cache_rw.write(); var ping_cache: *PingCache = ping_lock.mut(); - ping_cache._setPong(my_pubkey, addr); + ping_cache._setPong(rando_pubkey, addr); ping_lock.unlock(); var filter = CrdsFilter{ @@ -2211,26 +1953,21 @@ test "gossip.gossip_service: tests handle_pull_request" { .mask_bits = N_FILTER_BITS, }; - var packets = try gossip_service.handlePullRequest( - crds_value, - filter, - addr.toEndpoint(), - null, - ); - defer packets.?.deinit(); - try std.testing.expect(packets.?.items.len > 0); - - var batch_requests = ArrayList(GossipService.PullRequestMessage).init(allocator); - defer batch_requests.deinit(); - - var from_endpoint = addr.toEndpoint(); - try batch_requests.append(GossipService.PullRequestMessage{ - .value = crds_value, + var pull_requests = ArrayList(GossipService.PullRequestMessage).init(allocator); + defer pull_requests.deinit(); + try pull_requests.append(GossipService.PullRequestMessage{ .filter = filter, - .from_endpoint = from_endpoint, + .from_endpoint = contact_info.gossip.toEndpoint(), + .value = crds_value, }); - gossip_service.handleBatchPullRequest(batch_requests); + gossip_service.handleBatchPullRequest(pull_requests); + { + var packet_lg = gossip_service.packet_outgoing_channel.buffer.lock(); + defer packet_lg.unlock(); + var outgoing_packets: *const ArrayList(PacketBatch) = packet_lg.get(); + try std.testing.expect(outgoing_packets.items.len > 0); + } } test "gossip.gossip_service: test build prune messages and handle_push_msgs" { @@ -2280,18 +2017,35 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { try lg.mut().insert(ci_value, getWallclockMs()); lg.unlock(); - var forigins = try gossip_service.handlePushMessage(values.items); - defer forigins.deinit(); - try std.testing.expect(forigins.keys().len == 0); + var msgs = ArrayList(GossipService.PushMessage).init(allocator); + defer msgs.deinit(); - var failed_origins = try gossip_service.handlePushMessage(values.items); - defer failed_origins.deinit(); - try std.testing.expect(failed_origins.keys().len > 0); + var endpoint = gossip_socket.toEndpoint(); + try msgs.append(GossipService.PushMessage{ + .crds_values = values.items, + .from_endpoint = &endpoint, + .from_pubkey = &push_from, + }); - var prune_packets = try gossip_service.buildPruneMessage(&failed_origins, push_from); - defer prune_packets.deinit(); + try gossip_service.handleBatchPushMessages(&msgs, logger); + { + var packet_lg = gossip_service.packet_outgoing_channel.buffer.lock(); + defer packet_lg.unlock(); + var outgoing_packets: *const ArrayList(PacketBatch) = packet_lg.get(); + // zero prune messages + try std.testing.expect(outgoing_packets.items.len == 0); + } + + try gossip_service.handleBatchPushMessages(&msgs, logger); + var packet = blk: { + var packet_lg = gossip_service.packet_outgoing_channel.buffer.lock(); + defer packet_lg.unlock(); + var outgoing_packets: *const ArrayList(PacketBatch) = packet_lg.get(); + // > 0 prune messages to account for duplicate push messages + try std.testing.expect(outgoing_packets.items.len > 0); - var packet = prune_packets.items[0]; + break :blk outgoing_packets.items[0].items[0]; + }; var protocol_message = try bincode.readFromSlice( allocator, Protocol, @@ -2300,8 +2054,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { ); defer bincode.free(allocator, protocol_message); - var msg = protocol_message.PruneMessage; - var prune_data = msg[1]; + var prune_data = protocol_message.PruneMessage[1]; try std.testing.expect(prune_data.destination.equals(&push_from)); try std.testing.expectEqual(prune_data.prunes.len, 10); } @@ -2586,9 +2339,7 @@ test "gossip.gossip_service: process contact_info push packet" { var packet_handle = try Thread.spawn( .{}, GossipService.processMessages, - .{ - &gossip_service, - }, + .{&gossip_service}, ); // send a push message @@ -2634,7 +2385,12 @@ test "gossip.gossip_service: process contact_info push packet" { } const resp = (try responder_channel.try_drain()).?; - defer responder_channel.allocator.free(resp); + defer { + for (resp) |*packet_batch| { + packet_batch.deinit(); + } + responder_channel.allocator.free(resp); + } try std.testing.expect(resp.len == 1); exit.store(true, std.atomic.Ordering.Unordered); @@ -2669,10 +2425,6 @@ test "gossip.gossip_service: init, exit, and deinit" { gossip_service.echo_server.kill(); exit.store(true, std.atomic.Ordering.Unordered); - - const buf: [10]u8 = undefined; - _ = try gossip_service.gossip_socket.sendTo(gossip_address.toEndpoint(), &buf); - handle.join(); gossip_service.deinit(); } diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index 602cec9a3..e163eb0aa 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -22,35 +22,30 @@ pub fn readSocket( const MAX_WAIT_NS = std.time.ns_per_ms; // 1ms - var packet_batch: std.ArrayList(Packet) = undefined; - while (!exit.load(std.atomic.Ordering.Unordered)) { // init a new batch var count: usize = 0; const capacity = PACKETS_PER_BATCH; - packet_batch = try std.ArrayList(Packet).initCapacity( + var packet_batch = try std.ArrayList(Packet).initCapacity( allocator, capacity, ); - for (0..capacity) |_| { - packet_batch.appendAssumeCapacity(Packet.default()); - } + packet_batch.appendNTimesAssumeCapacity(Packet.default(), capacity); - // set socket to block - try socket.setReadTimeout(null); + // NOTE: usually this would be null (ie, blocking) + // but in order to exit cleanly in tests - we set to 1 second + try socket.setReadTimeout(std.time.ms_per_s); var timer = std.time.Timer.start() catch unreachable; // recv packets into batch while (true) { - var n_packets_read = recvMmsg(socket, packet_batch.items[count..capacity]) catch |err| { + var n_packets_read = recvMmsg(socket, packet_batch.items[count..capacity], exit) catch |err| { if (count > 0 and err == error.WouldBlock) { if (timer.read() > MAX_WAIT_NS) { break; } - continue; - } else { - return err; } + continue; }; if (count == 0) { @@ -68,14 +63,14 @@ pub fn readSocket( } try incoming_channel.send(packet_batch); } - - packet_batch.deinit(); + std.debug.print("recv_socket loop closed.\n", .{}); } pub fn recvMmsg( socket: *UdpSocket, /// pre-allocated array of packets to fill up packet_batch: []Packet, + exit: *const std.atomic.Atomic(bool), ) !usize { const max_size = packet_batch.len; var count: usize = 0; @@ -87,7 +82,8 @@ pub fn recvMmsg( if (count > 0 and err == error.WouldBlock) { break; } else { - return err; + if (exit.load(std.atomic.Ordering.Unordered)) return 0; + continue; } }; From 2529537c01cd14dfe3a80c57691ef874ba355158 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 4 Oct 2023 15:45:10 -0400 Subject: [PATCH 23/72] fix build msgs loop --- src/gossip/gossip_service.zig | 90 ++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index e7801691e..57f58a274 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -389,14 +389,8 @@ pub const GossipService = struct { } while (!self.exit.load(std.atomic.Ordering.Unordered)) { - // var drain_timer = std.time.Timer.start() catch unreachable; const maybe_packets = try self.packet_incoming_channel.try_drain(); - // const drain_elapsed = drain_timer.read(); - // self.logger.debugf("handle batch packet_drain took {} with {} items\n", .{ drain_elapsed, 1 }); - if (maybe_packets == null) { - // // sleep for 1ms - // std.time.sleep(std.time.ns_per_ms * 1); continue; } @@ -492,7 +486,6 @@ pub const GossipService = struct { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { - // // sleep for 1ms // std.time.sleep(std.time.ns_per_ms * 1); continue; } @@ -718,12 +711,7 @@ pub const GossipService = struct { self.logger.debugf("failed to generate pull requests: {any}", .{e}); break :pull_blk; }; - defer pull_packets.deinit(); - - // send packets - for (pull_packets.items) |packet| { - try self.packet_outgoing_channel.send(packet); - } + try self.packet_outgoing_channel.send(pull_packets); } // every other loop should_send_pull_requests = !should_send_pull_requests; @@ -735,9 +723,8 @@ pub const GossipService = struct { break :blk null; }; if (maybe_push_packets) |push_packets| { - defer push_packets.deinit(); - for (push_packets.items) |packet| { - try self.packet_outgoing_channel.send(packet); + for (push_packets.items) |packet_batch| { + try self.packet_outgoing_channel.send(packet_batch); } } @@ -809,7 +796,7 @@ pub const GossipService = struct { /// logic for building new push messages which are sent to peers from the /// active set and serialized into packets. - fn buildPushMessages(self: *Self, push_cursor: *u64) !?ArrayList(Packet) { + fn buildPushMessages(self: *Self, push_cursor: *u64) !?ArrayList(ArrayList(Packet)) { // TODO: find a better static value? var buf: [512]crds.CrdsVersionedValue = undefined; @@ -893,8 +880,8 @@ pub const GossipService = struct { const num_values_not_considered = crds_entries.len - num_values_considered; push_cursor.* -= num_values_not_considered; - var packets = ArrayList(Packet).init(self.allocator); - errdefer packets.deinit(); + var packet_batch = ArrayList(ArrayList(Packet)).init(self.allocator); + errdefer packet_batch.deinit(); var push_iter = push_messages.iterator(); while (push_iter.next()) |push_entry| { @@ -910,12 +897,10 @@ pub const GossipService = struct { ChunkType.PushMessage, ); if (maybe_endpoint_packets) |endpoint_packets| { - defer endpoint_packets.deinit(); - try packets.appendSlice(endpoint_packets.items); + try packet_batch.append(endpoint_packets); } } - - return packets; + return packet_batch; } /// builds new pull request messages and serializes it into a list of Packets @@ -1003,8 +988,13 @@ pub const GossipService = struct { defer pull_request.deinitCrdsFilters(&filters); // build packet responses - var output = try ArrayList(Packet).initCapacity(self.allocator, filters.items.len); - var packet_buf: [PACKET_DATA_SIZE]u8 = undefined; + var n_packets: usize = 0; + if (num_peers != 0) n_packets += filters.items.len; + if (should_send_to_entrypoint) n_packets += filters.items.len; + + var packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_packets); + packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_packets); + var packet_index: usize = 0; // update wallclock and sign self.my_contact_info.wallclock = now; @@ -1021,9 +1011,11 @@ pub const GossipService = struct { const protocol_msg = Protocol{ .PullRequest = .{ filter_i, my_contact_info_value } }; - var msg_slice = try bincode.writeToSlice(&packet_buf, protocol_msg, bincode.Params{}); - var packet = Packet.init(peer_addr, packet_buf, msg_slice.len); - output.appendAssumeCapacity(packet); + var packet = &packet_batch.items[packet_index]; + var bytes = try bincode.writeToSlice(&packet.data, protocol_msg, bincode.Params{}); + packet.size = bytes.len; + packet.addr = peer_addr; + packet_index += 1; } } @@ -1032,13 +1024,16 @@ pub const GossipService = struct { const entrypoint_addr = self.entrypoints.items[@as(usize, @intCast(entrypoint_index))]; for (filters.items) |filter| { const protocol_msg = Protocol{ .PullRequest = .{ filter, my_contact_info_value } }; - var msg_slice = try bincode.writeToSlice(&packet_buf, protocol_msg, bincode.Params{}); - var packet = Packet.init(entrypoint_addr.toEndpoint(), packet_buf, msg_slice.len); - try output.append(packet); + + var packet = &packet_batch.items[packet_index]; + var bytes = try bincode.writeToSlice(&packet.data, protocol_msg, bincode.Params{}); + packet.size = bytes.len; + packet.addr = entrypoint_addr.toEndpoint(); + packet_index += 1; } } - return output; + return packet_batch; } fn handleBatchPullRequest( @@ -1743,6 +1738,36 @@ pub fn chunkValuesIntoPacketIndexs( return packet_indexs; } +test "gossip.gossip_service: build messages startup and shutdown" { + const allocator = std.testing.allocator; + var exit = AtomicBool.init(false); + var my_keypair = try KeyPair.create([_]u8{1} ** 32); + var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); + + var contact_info = crds.LegacyContactInfo.default(my_pubkey); + contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); + + var logger = Logger.init(std.testing.allocator, .debug); + defer logger.deinit(); + logger.spawn(); + + var gossip_service = try GossipService.init( + allocator, + contact_info, + my_keypair, + null, + &exit, + logger, + ); + defer gossip_service.deinit(); + + var build_messages_handle = try Thread.spawn(.{}, GossipService.buildMessages, .{&gossip_service}); + std.time.sleep(std.time.ns_per_s * 3); + + exit.store(true, std.atomic.Ordering.Unordered); + build_messages_handle.join(); +} + test "gossip.gossip_service: tests handle_prune_messages" { var rng = std.rand.DefaultPrng.init(91); @@ -2168,6 +2193,7 @@ test "gossip.gossip_service: test build_push_messages" { var msgs = (try gossip_service.buildPushMessages(&cursor)).?; try std.testing.expectEqual(cursor, 11); try std.testing.expect(msgs.items.len > 0); + for (msgs.items) |*msg| msg.deinit(); msgs.deinit(); var msgs2 = try gossip_service.buildPushMessages(&cursor); From ecf5b8103c7943f1a768d7c503b52f40381b2685 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 4 Oct 2023 16:10:53 -0400 Subject: [PATCH 24/72] fix up benchmarking code --- src/gossip/gossip_service.zig | 33 +++++++++++++-------------------- src/gossip/socket_utils.zig | 7 ++++--- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 57f58a274..c60ce936b 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -170,7 +170,6 @@ pub const GossipService = struct { var failed_pull_hashes = HashTimeQueue.init(allocator); var push_msg_q = ArrayList(CrdsValue).init(allocator); - // TODO: figure out how to properly shut this guy down on exit var echo_server = echo.Server.init(allocator, my_contact_info.gossip.port(), logger, exit); return Self{ @@ -275,6 +274,7 @@ pub const GossipService = struct { &self.gossip_socket, self.packet_incoming_channel, self.exit, + self.logger, }); defer self.joinAndExit(&receiver_handle); @@ -306,6 +306,7 @@ pub const GossipService = struct { &self.gossip_socket, self.packet_incoming_channel, self.exit, + self.logger, }); defer self.joinAndExit(&receiver_handle); @@ -2464,13 +2465,13 @@ pub const BenchmarkGossipServiceGeneral = struct { pub const args = [_]usize{ 1_000, 5_000, - // 10_000, + 10_000, }; pub const arg_names = [_][]const u8{ "1k_msgs", "5k_msgs", - // "10k_msg_iters", + "10k_msg_iters", }; pub fn benchmarkGossipServiceProcessMessages(num_message_iterations: usize) !void { @@ -2499,6 +2500,7 @@ pub const BenchmarkGossipServiceGeneral = struct { &exit, logger, ); + gossip_service.echo_server.kill(); // we dont need this rn defer gossip_service.deinit(); var packet_handle = try Thread.spawn(.{}, GossipService.runSpy, .{ @@ -2532,18 +2534,18 @@ pub const BenchmarkGossipServiceGeneral = struct { var msg_sent: usize = 0; while (msg_sent < num_message_iterations) { - var packet_output = try ArrayList(Packet).initCapacity(allocator, 10); + var packet_batch = try ArrayList(Packet).initCapacity(allocator, 10); // send a ping message { - var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); - try packet_output.append(msg); + var packet = try fuzz.randomPingPacket(rng, &keypair, endpoint); + try packet_batch.append(packet); msg_sent += 1; } // send a pong message { - var msg = try fuzz.randomPongPacket(rng, &keypair, endpoint); - try packet_output.append(msg); + var packet = try fuzz.randomPongPacket(rng, &keypair, endpoint); + try packet_batch.append(packet); msg_sent += 1; } // send a push message @@ -2560,12 +2562,12 @@ pub const BenchmarkGossipServiceGeneral = struct { } // send a pull request { - var msg = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); - try packet_output.append(msg); + var packet = try fuzz.randomPullRequest(allocator, rng, &sender_keypair, address.toEndpoint()); + try packet_batch.append(packet); msg_sent += 1; } - try outgoing_channel.send(packet_output); + try outgoing_channel.send(packet_batch); } // wait for all messages to be processed @@ -2577,15 +2579,6 @@ pub const BenchmarkGossipServiceGeneral = struct { } exit.store(true, std.atomic.Ordering.Unordered); - // send a few more to make sure the socket exits - for (0..5) |_| { - var msg = try fuzz.randomPingPacket(rng, &keypair, endpoint); - - var packet_output = try ArrayList(Packet).initCapacity(allocator, 1); - packet_output.appendAssumeCapacity(msg); - - try outgoing_channel.send(packet_output); - } packet_handle.join(); sender_exit.store(true, std.atomic.Ordering.Unordered); diff --git a/src/gossip/socket_utils.zig b/src/gossip/socket_utils.zig index e163eb0aa..ae7c30566 100644 --- a/src/gossip/socket_utils.zig +++ b/src/gossip/socket_utils.zig @@ -13,6 +13,7 @@ pub fn readSocket( socket: *UdpSocket, incoming_channel: *Channel(std.ArrayList(Packet)), exit: *const std.atomic.Atomic(bool), + logger: Logger, ) !void { //Performance out of the IO without poll // * block on the socket until it's readable @@ -63,7 +64,7 @@ pub fn readSocket( } try incoming_channel.send(packet_batch); } - std.debug.print("recv_socket loop closed.\n", .{}); + logger.debugf("readSocket loop closed\n", .{}); } pub fn recvMmsg( @@ -138,7 +139,7 @@ pub fn sendSocket( } } } - logger.debugf("send_socket loop closed\n", .{}); + logger.debugf("sendSocket loop closed\n", .{}); } pub const BenchmarkPacketProcessing = struct { @@ -167,7 +168,7 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Atomic(bool).init(false); - var handle = try std.Thread.spawn(.{}, readSocket, .{ allocator, &socket, channel, &exit }); + var handle = try std.Thread.spawn(.{}, readSocket, .{ allocator, &socket, channel, &exit, .noop }); var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, n_packets }); var rand = std.rand.DefaultPrng.init(0); From aea5ccba4eb533b34607288b34d82e8d0793ddbd Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Thu, 5 Oct 2023 17:59:06 -0400 Subject: [PATCH 25/72] add shred version filtering to push messages --- src/gossip/crds_table.zig | 10 +++ src/gossip/gossip_service.zig | 120 ++++++++++++++++++++++++++-------- 2 files changed, 104 insertions(+), 26 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 1464764ba..fd71a0100 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -422,6 +422,16 @@ pub const CrdsTable = struct { return indexs; } + // ** helper functions ** + pub fn check_matching_shred_version(self: *const Self, pubkey: Pubkey, expected_shred_version: u16) bool { + if (self.shred_versions.get(pubkey)) |pubkey_shred_version| { + if (pubkey_shred_version == expected_shred_version) { + return true; + } + } + return false; + } + // ** triming values in the crdstable ** pub fn remove(self: *Self, label: CrdsValueLabel) error{ LabelNotFound, OutOfMemory }!void { const now = crds.getWallclockMs(); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index c60ce936b..f59a83ba9 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -102,7 +102,7 @@ pub const GossipService = struct { my_contact_info: crds.LegacyContactInfo, my_keypair: KeyPair, my_pubkey: Pubkey, - my_shred_version: u64, + my_shred_version: u16, exit: *AtomicBool, // communication between threads @@ -147,7 +147,7 @@ pub const GossipService = struct { } var thread_pool = try allocator.create(ThreadPool); - var n_threads = @max(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 8); + var n_threads = @min(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 8); thread_pool.* = ThreadPool.init(.{ .max_threads = n_threads, .stack_size = 2 * 1024 * 1024, @@ -264,7 +264,8 @@ pub const GossipService = struct { /// 2) packet verifier /// 3) packet processor /// 4) build message loop (to send outgoing message) - /// and 5) a socket responder (to send outgoing packets) + /// 5) a socket responder (to send outgoing packets) + /// 6) echo server pub fn run(self: *Self) !void { var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); defer self.joinAndExit(&ip_echo_server_listener_handle); @@ -373,14 +374,13 @@ pub const GossipService = struct { var tasks: [socket_utils.PACKETS_PER_BATCH]*VerifyMessageTask = undefined; // pre-allocate all the tasks for (0..tasks.len) |i| { - const verify_task = VerifyMessageTask{ + var verify_task_heap = try self.allocator.create(VerifyMessageTask); + verify_task_heap.* = VerifyMessageTask{ .task = .{ .callback = VerifyMessageTask.callback }, .allocator = self.allocator, .verified_incoming_channel = self.verified_incoming_channel, .packet = &Packet.default(), }; - var verify_task_heap = try self.allocator.create(VerifyMessageTask); - verify_task_heap.* = verify_task; tasks[i] = verify_task_heap; } defer { @@ -447,8 +447,8 @@ pub const GossipService = struct { pub const PushMessage = struct { crds_values: []CrdsValue, - from_pubkey: *Pubkey, - from_endpoint: *EndPoint, + from_pubkey: *const Pubkey, + from_endpoint: *const EndPoint, }; pub const PullResponseMessage = struct { @@ -456,6 +456,20 @@ pub const GossipService = struct { from_pubkey: *Pubkey, }; + pub const ShredVersionTask = struct { + protocol_message: *const ProtocolMessage, + allocator: std.mem.Allocator, + + task: Task, + done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + is_valid: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), + + pub fn callback(task: *Task) void { + var self = @fieldParentPtr(@This(), "task", task); + defer self.done.store(true, std.atomic.Ordering.Release); + } + }; + /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; @@ -519,9 +533,26 @@ pub const GossipService = struct { }); }, .PullRequest => |*pull| { + const value: CrdsValue = pull[1]; + switch (value.data) { + .LegacyContactInfo => |*data| { + if (data.id.equals(&self.my_pubkey)) { + // talking to myself == ignore + continue; + } + // Allow spy nodes with shred-verion == 0 to pull from other nodes. + if (data.shred_version != 0 and data.shred_version != self.my_shred_version) { + // non-matching shred version + continue; + } + }, + // only contact info supported + else => continue, + } + try pull_requests.append(.{ .filter = pull[0], - .value = pull[1], + .value = value, .from_endpoint = from_endpoint, }); }, @@ -724,7 +755,7 @@ pub const GossipService = struct { break :blk null; }; if (maybe_push_packets) |push_packets| { - for (push_packets.items) |packet_batch| { + for (push_packets.items) |packet_batch| { try self.packet_outgoing_channel.send(packet_batch); } } @@ -989,7 +1020,7 @@ pub const GossipService = struct { defer pull_request.deinitCrdsFilters(&filters); // build packet responses - var n_packets: usize = 0; + var n_packets: usize = 0; if (num_peers != 0) n_packets += filters.items.len; if (should_send_to_entrypoint) n_packets += filters.items.len; @@ -1016,7 +1047,7 @@ pub const GossipService = struct { var bytes = try bincode.writeToSlice(&packet.data, protocol_msg, bincode.Params{}); packet.size = bytes.len; packet.addr = peer_addr; - packet_index += 1; + packet_index += 1; } } @@ -1067,17 +1098,6 @@ pub const GossipService = struct { var self = @fieldParentPtr(@This(), "task", task); defer self.done.store(true, std.atomic.Ordering.Release); - switch (self.value.data) { - .LegacyContactInfo => |*info| { - if (info.id.equals(self.my_pubkey)) { - // talking to myself == ignore - return; - } - }, - // only contact info supported - else => return, - } - const output_limit = self.output_limit.load(std.atomic.Ordering.Unordered); if (output_limit <= 0) { return; @@ -1440,8 +1460,14 @@ pub const GossipService = struct { for (batch_push_messages.items) |*push_message| { var crds_table: *CrdsTable = crds_table_lock.mut(); - var result = try crds_table.insertValues( + const valid_len = self.filterCrdsValuesBasedOnShredVersion( + crds_table, push_message.crds_values, + push_message.from_pubkey.*, + ); + + var result = try crds_table.insertValues( + push_message.crds_values[0..valid_len], CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS, false, false, @@ -1658,6 +1684,48 @@ pub const GossipService = struct { return nodes[0..node_index]; } + + pub fn filterCrdsValuesBasedOnShredVersion( + self: *Self, + crds_table: *const CrdsTable, + crds_values: []CrdsValue, + from_pubkey: Pubkey, + ) usize { + // we use swap remove which just reorders the array + // (order dm), so we just track the new len -- ie, no allocations/frees + var crds_values_array = ArrayList(CrdsValue).fromOwnedSlice(self.allocator, crds_values); + if (crds_table.check_matching_shred_version(from_pubkey, self.my_shred_version)) { + for (crds_values, 0..) |*crds_value, i| { + switch (crds_value.data) { + // always allow contact info + node instance to update shred versions + .LegacyContactInfo => {}, + .NodeInstance => {}, + else => { + // only allow other values with matching shred versions + if (!crds_table.check_matching_shred_version( + crds_value.id(), + self.my_shred_version, + )) { + _ = crds_values_array.swapRemove(i); + } + }, + } + } + } else { + for (crds_values, 0..) |*crds_value, i| { + switch (crds_value.data) { + // always allow contact info + node instance to update shred versions + .LegacyContactInfo => {}, + .NodeInstance => {}, + else => { + // dont update any other values + _ = crds_values_array.swapRemove(i); + }, + } + } + } + return crds_values_array.items.len; + } }; pub const ChunkType = enum(u8) { @@ -1739,7 +1807,7 @@ pub fn chunkValuesIntoPacketIndexs( return packet_indexs; } -test "gossip.gossip_service: build messages startup and shutdown" { +test "gossip.gossip_service: build messages startup and shutdown" { const allocator = std.testing.allocator; var exit = AtomicBool.init(false); var my_keypair = try KeyPair.create([_]u8{1} ** 32); @@ -2500,7 +2568,7 @@ pub const BenchmarkGossipServiceGeneral = struct { &exit, logger, ); - gossip_service.echo_server.kill(); // we dont need this rn + gossip_service.echo_server.kill(); // we dont need this rn defer gossip_service.deinit(); var packet_handle = try Thread.spawn(.{}, GossipService.runSpy, .{ From 6c761634bb4d7507bd70f0a3008a22deaeb9c503 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Thu, 5 Oct 2023 18:06:20 -0400 Subject: [PATCH 26/72] add shred version filtering on pull responses too; --- src/gossip/gossip_service.zig | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index f59a83ba9..04e622f38 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -613,7 +613,9 @@ pub const GossipService = struct { if (pull_requests.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_requests.items.len; - self.handleBatchPullRequest(pull_requests); + self.handleBatchPullRequest(pull_requests) catch |err| { + std.debug.print("handleBatchPullRequest failed: {}\n", .{err}); + }; const elapsed = x_timer.read(); self.logger.debugf("handle batch pull_req took {} with {} items @{}\n", .{ elapsed, length, msg_count }); @@ -627,7 +629,9 @@ pub const GossipService = struct { if (pull_responses.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_responses.items.len; - try self.handleBatchPullResponses(&pull_responses, self.logger); + self.handleBatchPullResponses(&pull_responses, self.logger) catch |err| { + std.debug.print("handleBatchPullResponses failed: {}\n", .{err}); + }; const elapsed = x_timer.read(); self.logger.debugf("handle batch pull_resp took {} with {} items @{}\n", .{ elapsed, length, msg_count }); pull_responses.clearRetainingCapacity(); @@ -1068,15 +1072,6 @@ pub const GossipService = struct { return packet_batch; } - fn handleBatchPullRequest( - self: *Self, - pull_requests: ArrayList(PullRequestMessage), - ) void { - self.handleBatchPullRequestParallel(pull_requests) catch |err| { - std.debug.print("handleBatchPullRequestParallel failed: {}\n", .{err}); - }; - } - const PullRequestTask = struct { allocator: std.mem.Allocator, my_pubkey: *const Pubkey, @@ -1137,7 +1132,7 @@ pub const GossipService = struct { } }; - fn handleBatchPullRequestParallel( + fn handleBatchPullRequest( self: *Self, pull_requests: ArrayList(PullRequestMessage), ) !void { @@ -1294,10 +1289,14 @@ pub const GossipService = struct { defer crds_table_lock.unlock(); for (pull_response_messages.items) |*pull_message| { - const crds_values = pull_message.crds_values; + const valid_len = self.filterCrdsValuesBasedOnShredVersion( + crds_table, + pull_message.crds_values, + pull_message.from_pubkey.*, + ); const insert_results = try crds_table.insertValues( - crds_values, + pull_message.crds_values[0..valid_len], CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, true, true, @@ -1309,7 +1308,7 @@ pub const GossipService = struct { defer timeout_indexs.deinit(); for (timeout_indexs.items) |index| { crds_table.insert( - crds_values[index], + pull_message.crds_values[index], now, ) catch {}; } @@ -1319,7 +1318,7 @@ pub const GossipService = struct { const successful_insert_indexs = insert_results.inserted.?; defer successful_insert_indexs.deinit(); for (successful_insert_indexs.items) |index| { - const origin = crds_values[index].id(); + const origin = pull_message.crds_values[index].id(); crds_table.updateRecordTimestamp(origin, now); } crds_table.updateRecordTimestamp(pull_message.from_pubkey.*, now); @@ -1327,7 +1326,7 @@ pub const GossipService = struct { var failed_insert_indexs = insert_results.failed.?; defer failed_insert_indexs.deinit(); for (failed_insert_indexs.items) |index| { - try failed_insert_ptrs.append(&crds_values[index]); + try failed_insert_ptrs.append(&pull_message.crds_values[index]); } } } @@ -2055,7 +2054,7 @@ test "gossip.gossip_service: tests handle_pull_request" { .value = crds_value, }); - gossip_service.handleBatchPullRequest(pull_requests); + try gossip_service.handleBatchPullRequest(pull_requests); { var packet_lg = gossip_service.packet_outgoing_channel.buffer.lock(); defer packet_lg.unlock(); From db2190dd7dfd5db184acfa355542213660285d11 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Thu, 5 Oct 2023 18:09:17 -0400 Subject: [PATCH 27/72] update --- src/gossip/gossip_service.zig | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 04e622f38..0401ecf65 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -513,8 +513,6 @@ pub const GossipService = struct { defer self.verified_incoming_channel.allocator.free(protocol_messages); msg_count += protocol_messages.len; - // TODO: filter messages based on_shred_version - for (protocol_messages) |*protocol_message| { var from_endpoint: EndPoint = protocol_message.from_endpoint; @@ -1137,7 +1135,7 @@ pub const GossipService = struct { pull_requests: ArrayList(PullRequestMessage), ) !void { // update the callers - // TODO: parallelize this + // TODO: parallelize this? const now = getWallclockMs(); { var crds_table_lock = self.crds_table_rw.write(); @@ -1225,23 +1223,20 @@ pub const GossipService = struct { for (valid_indexs.items) |i| { // TODO: pre-allocate these tasks + // create the thread task - var output = ArrayList(Packet).init(self.allocator); - var task = PullRequestTask{ + var task_heap = try self.allocator.create(PullRequestTask); + task_heap.* = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, .my_pubkey = &self.my_pubkey, .from_endpoint = &pull_requests.items[i].from_endpoint, .filter = &pull_requests.items[i].filter, .value = &pull_requests.items[i].value, .crds_table = crds_table, - .output = output, + .output = ArrayList(Packet).init(self.allocator), .allocator = self.allocator, .output_limit = &output_limit, }; - - // alloc on heap - var task_heap = try self.allocator.create(PullRequestTask); - task_heap.* = task; tasks.appendAssumeCapacity(task_heap); // run it @@ -1510,13 +1505,13 @@ pub const GossipService = struct { } // build prune packets - // TODO: figure out a way to re-use this allocation const now = getWallclockMs(); var pubkey_to_failed_origins_iter = pubkey_to_failed_origins.iterator(); var n_packets = pubkey_to_failed_origins_iter.len; if (n_packets == 0) return; + // TODO: figure out a way to re-use this allocation var prune_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_packets); prune_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_packets); var count: usize = 0; From 94802001f41fff1966750fd6057729b8ad45f835 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Sun, 8 Oct 2023 16:57:52 -0400 Subject: [PATCH 28/72] fix mem leaks --- src/common/lru.zig | 7 +++++-- src/gossip/gossip_service.zig | 23 +++++++++-------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index c01f32090..b86700abb 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -65,6 +65,7 @@ pub fn LruCache(comptime K: type, comptime V: type) type { while (self.dbl_link_list.pop()) |node| { self.deinitNode(node); } + std.debug.assert(self.len == 0); // no leaks self.hashmap.deinit(); } @@ -156,8 +157,10 @@ pub fn LruCache(comptime K: type, comptime V: type) type { pub fn pop(self: *Self, k: K) ?V { if (self.hashmap.fetchSwapRemove(k)) |kv| { - self.dbl_link_list.remove(kv.value); - return kv.value.data.value; + var node = kv.value; + self.dbl_link_list.remove(node); + self.deinitNode(node); + return node.data.value; } return null; } diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 0401ecf65..ae92e9b21 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -225,13 +225,7 @@ pub const GossipService = struct { buff_lock.unlock(); self.packet_incoming_channel.deinit(); } - { - var buff_lock = self.verified_incoming_channel.buffer.lock(); - var buff: *std.ArrayList(ProtocolMessage) = buff_lock.mut(); - for (buff.items) |*item| bincode.free(self.allocator, &item.message); - buff_lock.unlock(); - self.verified_incoming_channel.deinit(); - } + self.verified_incoming_channel.deinit(); { var buff_lock = self.packet_outgoing_channel.buffer.lock(); var buff: *std.ArrayList(PacketBatch) = buff_lock.mut(); @@ -360,7 +354,6 @@ pub const GossipService = struct { const msg = ProtocolMessage{ .from_endpoint = self.packet.addr, - // TODO: remove self copy (its on the heap - should just need a ptr) .message = protocol_message, }; self.verified_incoming_channel.send(msg) catch unreachable; @@ -510,7 +503,13 @@ pub const GossipService = struct { } const protocol_messages = maybe_protocol_messages.?; - defer self.verified_incoming_channel.allocator.free(protocol_messages); + defer { + for (protocol_messages) |*msg| { + bincode.free(self.allocator, msg.message); + } + self.verified_incoming_channel.allocator.free(protocol_messages); + } + msg_count += protocol_messages.len; for (protocol_messages) |*protocol_message| { @@ -616,10 +615,6 @@ pub const GossipService = struct { }; const elapsed = x_timer.read(); self.logger.debugf("handle batch pull_req took {} with {} items @{}\n", .{ elapsed, length, msg_count }); - - for (pull_requests.items) |*pr| { - pr.filter.deinit(); - } pull_requests.clearRetainingCapacity(); } @@ -1166,8 +1161,8 @@ pub const GossipService = struct { // filter out valid peers and send ping messages to peers var now_instant = std.time.Instant.now() catch @panic("time is not supported on this OS!"); var puller_socket_addr = SocketAddr.fromEndpoint(&req.from_endpoint); - const caller = req.value.id(); + var result = ping_cache.check( now_instant, .{ caller, puller_socket_addr }, From 373758b50235681ee68166b892d9b1acc88d75d8 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Mon, 9 Oct 2023 14:56:07 -0400 Subject: [PATCH 29/72] update --- src/bincode/bincode.zig | 1 - src/gossip/gossip_service.zig | 31 ++++--------------------------- src/gossip/packet.zig | 8 -------- 3 files changed, 4 insertions(+), 36 deletions(-) diff --git a/src/bincode/bincode.zig b/src/bincode/bincode.zig index 94077aca8..28125c5ff 100644 --- a/src/bincode/bincode.zig +++ b/src/bincode/bincode.zig @@ -217,7 +217,6 @@ pub fn Deserializer(comptime Reader: type) type { inline for (info.fields) |field| { if (getFieldConfig(T, field)) |config| { if (config.free) |free_fcn| { - // std.debug.print("found free fcn...\n", .{}); var field_value = @field(value, field.name); switch (@typeInfo(field.type)) { .Pointer => |*field_info| { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index ae92e9b21..62192a543 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -449,20 +449,6 @@ pub const GossipService = struct { from_pubkey: *Pubkey, }; - pub const ShredVersionTask = struct { - protocol_message: *const ProtocolMessage, - allocator: std.mem.Allocator, - - task: Task, - done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), - is_valid: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), - - pub fn callback(task: *Task) void { - var self = @fieldParentPtr(@This(), "task", task); - defer self.done.store(true, std.atomic.Ordering.Release); - } - }; - /// main logic for recieving and processing `Protocol` messages. pub fn processMessages(self: *Self) !void { var timer = std.time.Timer.start() catch unreachable; @@ -687,31 +673,22 @@ pub const GossipService = struct { // TRIM crds-table { - var x_timer = std.time.Timer.start() catch unreachable; - defer { - const elapsed = x_timer.read(); - self.logger.debugf("handle batch crds_trim took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); - } - var crds_table_lock = self.crds_table_rw.write(); defer crds_table_lock.unlock(); - var crds_table: *CrdsTable = crds_table_lock.mut(); + + var x_timer = std.time.Timer.start() catch unreachable; crds_table.attemptTrim(CRDS_UNIQUE_PUBKEY_CAPACITY) catch |err| { self.logger.warnf("error trimming crds table: {s}", .{@errorName(err)}); }; + const elapsed = x_timer.read(); + self.logger.debugf("handle batch crds_trim took {} with {} items @{}\n", .{ elapsed, 1, msg_count }); } const elapsed = timer.read(); self.logger.debugf("{} messages processed in {}ns\n", .{ msg_count, elapsed }); // std.debug.print("{} messages processed in {}ns\n", .{ msg_count, elapsed }); self.messages_processed.store(msg_count, std.atomic.Ordering.Release); - // if (msg_count >= 30_000) { - // // if (msg_count >= 1_000) { - // std.debug.print("exiting...\n", .{}); - // self.exit.store(true, std.atomic.Ordering.Unordered); - // break; - // } } self.logger.debugf("process_messages loop closed\n", .{}); diff --git a/src/gossip/packet.zig b/src/gossip/packet.zig index c58c538f1..f096dcaa7 100644 --- a/src/gossip/packet.zig +++ b/src/gossip/packet.zig @@ -28,12 +28,4 @@ pub const Packet = struct { .size = 0, }; } - - pub fn default() Self { - return .{ - .addr = .{ .port = 0, .address = .{ .ipv4 = network.Address.IPv4.any } }, - .data = undefined, - .size = 0, - }; - } }; From a60d39093c0d2fdc7a7afdc2fb4d98065b172ea2 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Mon, 9 Oct 2023 15:08:35 -0400 Subject: [PATCH 30/72] fix test --- src/gossip/gossip_service.zig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 62192a543..36fbd9b0a 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -491,6 +491,9 @@ pub const GossipService = struct { const protocol_messages = maybe_protocol_messages.?; defer { for (protocol_messages) |*msg| { + const msg_type = @intFromEnum(msg.message); + std.debug.print("msg_type: {}\n", .{msg_type}); + bincode.free(self.allocator, msg.message); } self.verified_incoming_channel.allocator.free(protocol_messages); @@ -2412,9 +2415,10 @@ test "gossip.gossip_service: process contact_info push packet" { .LegacyContactInfo = legacy_contact_info, }; var crds_value = try crds.CrdsValue.initSigned(crds_data, &kp); - var values = [_]crds.CrdsValue{crds_value}; + var heap_values = try allocator.alloc(crds.CrdsValue, 1); + heap_values[0] = crds_value; const msg = Protocol{ - .PushMessage = .{ id, &values }, + .PushMessage = .{ id, heap_values }, }; // packet From 899a91091b6cbcd81cf8402ef6c618a9c0e3c6a6 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 10 Oct 2023 12:25:37 -0400 Subject: [PATCH 31/72] filter out msgs with unspecified addr or port == 0 --- src/gossip/gossip_service.zig | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 36fbd9b0a..377591592 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -491,9 +491,6 @@ pub const GossipService = struct { const protocol_messages = maybe_protocol_messages.?; defer { for (protocol_messages) |*msg| { - const msg_type = @intFromEnum(msg.message); - std.debug.print("msg_type: {}\n", .{msg_type}); - bincode.free(self.allocator, msg.message); } self.verified_incoming_channel.allocator.free(protocol_messages); @@ -536,6 +533,12 @@ pub const GossipService = struct { else => continue, } + const from_addr = SocketAddr.fromEndpoint(&from_endpoint); + if (from_addr.isUnspecified() or from_addr.port() == 0) { + // unable to respond to these messages + continue; + } + try pull_requests.append(.{ .filter = pull[0], .value = value, @@ -559,7 +562,12 @@ pub const GossipService = struct { try prune_messages.append(&prune[1]); }, .PingMessage => |*ping| { - // TODO: filter out endpoints which are unspecificed / port = 0 + const from_addr = SocketAddr.fromEndpoint(&from_endpoint); + if (from_addr.isUnspecified() or from_addr.port() == 0) { + // unable to respond to these messages + continue; + } + try ping_messages.append(PingMessage{ .ping = ping, .from_endpoint = &from_endpoint, From abb77a98ef8f557b56cd624668089d27628f07b9 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 10 Oct 2023 13:20:50 -0400 Subject: [PATCH 32/72] fix pull request logic to better use ping_cache --- src/gossip/gossip_service.zig | 141 ++++++++++++++-------------------- src/gossip/ping_pong.zig | 8 +- 2 files changed, 63 insertions(+), 86 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 377591592..667ffb964 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -27,6 +27,7 @@ const Ping = @import("ping_pong.zig").Ping; const Pong = @import("ping_pong.zig").Pong; const bincode = @import("../bincode/bincode.zig"); const crds = @import("../gossip/crds.zig"); +const LegacyContactInfo = crds.LegacyContactInfo; const CrdsValue = crds.CrdsValue; const KeyPair = std.crypto.sign.Ed25519.KeyPair; @@ -99,7 +100,7 @@ pub const GossipService = struct { // note: this contact info should not change gossip_socket: UdpSocket, - my_contact_info: crds.LegacyContactInfo, + my_contact_info: LegacyContactInfo, my_keypair: KeyPair, my_pubkey: Pubkey, my_shred_version: u16, @@ -130,7 +131,7 @@ pub const GossipService = struct { pub fn init( allocator: std.mem.Allocator, - my_contact_info: crds.LegacyContactInfo, + my_contact_info: LegacyContactInfo, my_keypair: KeyPair, entrypoints: ?ArrayList(SocketAddr), exit: *AtomicBool, @@ -549,17 +550,13 @@ pub const GossipService = struct { var prune_data = &prune[1]; const now = getWallclockMs(); const prune_wallclock = prune_data.wallclock; + const too_old = prune_wallclock < now -| CRDS_GOSSIP_PRUNE_MSG_TIMEOUT_MS; - if (too_old) { - // return error.PruneMessageTooOld; - continue; - } - const bad_destination = !prune_data.destination.equals(&self.my_pubkey); - if (bad_destination) { - // return error.BadDestination; + const incorrect_destination = !prune_data.destination.equals(&self.my_pubkey); + if (too_old or incorrect_destination) { continue; } - try prune_messages.append(&prune[1]); + try prune_messages.append(prune_data); }, .PingMessage => |*ping| { const from_addr = SocketAddr.fromEndpoint(&from_endpoint); @@ -583,7 +580,6 @@ pub const GossipService = struct { } // handle batch messages - // PUSH if (push_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = push_messages.items.len; @@ -593,7 +589,6 @@ pub const GossipService = struct { push_messages.clearRetainingCapacity(); } - // PRUNE if (prune_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = prune_messages.items.len; @@ -603,7 +598,6 @@ pub const GossipService = struct { prune_messages.clearRetainingCapacity(); } - // PULL REQ if (pull_requests.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_requests.items.len; @@ -615,7 +609,6 @@ pub const GossipService = struct { pull_requests.clearRetainingCapacity(); } - // PULL RESP if (pull_responses.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_responses.items.len; @@ -627,7 +620,6 @@ pub const GossipService = struct { pull_responses.clearRetainingCapacity(); } - // PING const n_ping_messages = ping_messages.items.len; if (n_ping_messages > 0) { var x_timer = std.time.Timer.start() catch unreachable; @@ -638,8 +630,6 @@ pub const GossipService = struct { ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); errdefer ping_packet_batch.deinit(); - // TODO: add back logging - for (ping_messages.items, 0..) |*ping_message, i| { const pong = try Pong.init(ping_message.ping, &self.my_keypair); const pong_message = Protocol{ .PongMessage = pong }; @@ -660,7 +650,6 @@ pub const GossipService = struct { ping_messages.clearRetainingCapacity(); } - // PONG if (pong_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); @@ -677,7 +666,6 @@ pub const GossipService = struct { now, ); } - self.logger.debugf("handle batch pong took {} with {} items @{}\n", .{ x_timer.read(), length, msg_count }); pong_messages.clearRetainingCapacity(); } @@ -784,7 +772,7 @@ pub const GossipService = struct { self: *Self, ) error{ OutOfMemory, SerializationError, ChannelClosed }!void { const now = getWallclockMs(); - var buf: [NUM_ACTIVE_SET_ENTRIES]crds.LegacyContactInfo = undefined; + var buf: [NUM_ACTIVE_SET_ENTRIES]LegacyContactInfo = undefined; var gossip_peers = self.getGossipNodes(&buf, NUM_ACTIVE_SET_ENTRIES, now); // filter out peers who have responded to pings @@ -796,8 +784,13 @@ pub const GossipService = struct { var result = try ping_cache.filterValidPeers(self.allocator, self.my_keypair, gossip_peers); break :blk result; }; - var valid_gossip_peers = ping_cache_result.valid_peers; - defer valid_gossip_peers.deinit(); + var valid_gossip_indexs = ping_cache_result.valid_peers; + defer valid_gossip_indexs.deinit(); + + var valid_gossip_peers: [NUM_ACTIVE_SET_ENTRIES]LegacyContactInfo = undefined; + for (valid_gossip_indexs.items) |i| { + valid_gossip_peers[i] = gossip_peers[i]; + } // send pings to peers var pings_to_send_out = ping_cache_result.pings; @@ -808,7 +801,7 @@ pub const GossipService = struct { var active_set_lock = self.active_set_rw.write(); defer active_set_lock.unlock(); var active_set: *ActiveSet = active_set_lock.mut(); - try active_set.rotate(valid_gossip_peers.items); + try active_set.rotate(valid_gossip_peers[0..valid_gossip_indexs.items.len]); } /// logic for building new push messages which are sent to peers from the @@ -928,7 +921,7 @@ pub const GossipService = struct { bloom_size: usize, ) !ArrayList(Packet) { // get nodes from crds table - var buf: [MAX_NUM_PULL_REQUESTS]crds.LegacyContactInfo = undefined; + var buf: [MAX_NUM_PULL_REQUESTS]LegacyContactInfo = undefined; const now = getWallclockMs(); var peers = self.getGossipNodes( &buf, @@ -969,8 +962,8 @@ pub const GossipService = struct { var result = try ping_cache.filterValidPeers(self.allocator, self.my_keypair, peers); break :blk result; }; - var valid_gossip_peers = ping_cache_result.valid_peers; - defer valid_gossip_peers.deinit(); + var valid_gossip_peer_indexs = ping_cache_result.valid_peers; + defer valid_gossip_peer_indexs.deinit(); // send pings to peers var pings_to_send_out = ping_cache_result.pings; @@ -978,7 +971,7 @@ pub const GossipService = struct { try self.sendPings(pings_to_send_out); const should_send_to_entrypoint = entrypoint_index != -1; - const num_peers = valid_gossip_peers.items.len; + const num_peers = valid_gossip_peer_indexs.items.len; if (num_peers == 0 and !should_send_to_entrypoint) { return error.NoPeers; @@ -1023,7 +1016,8 @@ pub const GossipService = struct { for (filters.items) |filter_i| { // TODO: incorperate stake weight in random sampling const peer_index = rng.random().intRangeAtMost(usize, 0, num_peers - 1); - const peer_contact_info = valid_gossip_peers.items[peer_index]; + const peer_contact_info_index = valid_gossip_peer_indexs.items[peer_index]; + const peer_contact_info = peers[peer_contact_info_index]; const peer_addr = peer_contact_info.gossip.toEndpoint(); const protocol_msg = Protocol{ .PullRequest = .{ filter_i, my_contact_info_value } }; @@ -1132,56 +1126,39 @@ pub const GossipService = struct { } } - const n_requests = pull_requests.items.len; - var valid_indexs = try ArrayList(usize).initCapacity(self.allocator, n_requests); - defer valid_indexs.deinit(); - - { + var valid_indexs = blk: { var ping_cache_lock = self.ping_cache_rw.write(); defer ping_cache_lock.unlock(); var ping_cache: *PingCache = ping_cache_lock.mut(); - // TODO: only allocate this once - var ping_packets = try ArrayList(Packet).initCapacity(self.allocator, n_requests); - var count: usize = 0; + var peers = try ArrayList(LegacyContactInfo).initCapacity(self.allocator, pull_requests.items.len); + defer peers.deinit(); + for (pull_requests.items) |req| { + peers.appendAssumeCapacity(req.value.data.LegacyContactInfo); + } - for (pull_requests.items, 0..) |req, i| { - // filter out valid peers and send ping messages to peers - var now_instant = std.time.Instant.now() catch @panic("time is not supported on this OS!"); - var puller_socket_addr = SocketAddr.fromEndpoint(&req.from_endpoint); - const caller = req.value.id(); + const result = try ping_cache.filterValidPeers(self.allocator, self.my_keypair, peers.items); + const ping_and_addrs = result.pings; + defer ping_and_addrs.deinit(); - var result = ping_cache.check( - now_instant, - .{ caller, puller_socket_addr }, - &self.my_keypair, - ); + const n_pings = ping_and_addrs.items.len; + if (n_pings > 0) { + var ping_packets = try ArrayList(Packet).initCapacity(self.allocator, n_pings); + ping_packets.appendNTimesAssumeCapacity(Packet.default(), n_pings); - // send a ping - if (result.maybe_ping) |ping| { - ping_packets.appendAssumeCapacity(Packet.default()); - var packet = &ping_packets.items[count]; + for (ping_and_addrs.items, ping_packets.items) |*ping_and_addr, *packet| { + const ping = ping_and_addr.ping; + const protocol_msg = Protocol{ .PingMessage = ping }; - var protocol_msg = Protocol{ .PingMessage = ping }; var serialized_ping = bincode.writeToSlice(&packet.data, protocol_msg, .{}) catch return error.SerializationError; - packet.addr = req.from_endpoint; + packet.addr = ping_and_addr.socket.toEndpoint(); packet.size = serialized_ping.len; - - count += 1; - } - - if (result.passes_ping_check) { - valid_indexs.appendAssumeCapacity(i); } - } - - // send the pings - if (count > 0) { try self.packet_outgoing_channel.send(ping_packets); - } else { - ping_packets.deinit(); } - } + break :blk result.valid_peers; + }; + defer valid_indexs.deinit(); if (valid_indexs.items.len == 0) { return; @@ -1605,12 +1582,12 @@ pub const GossipService = struct { pub fn getGossipNodes( self: *Self, /// the output slice which will be filled with gossip nodes - nodes: []crds.LegacyContactInfo, + nodes: []LegacyContactInfo, /// the maximum number of nodes to return ( max_size == nodes.len but comptime for init of stack array) comptime MAX_SIZE: usize, /// current time (used to filter out nodes that are too old) now: u64, - ) []crds.LegacyContactInfo { + ) []LegacyContactInfo { std.debug.assert(MAX_SIZE == nodes.len); // * 2 bc we might filter out some @@ -1790,7 +1767,7 @@ test "gossip.gossip_service: build messages startup and shutdown" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -1822,7 +1799,7 @@ test "gossip.gossip_service: tests handle_prune_messages" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -1841,7 +1818,7 @@ test "gossip.gossip_service: tests handle_prune_messages" { // add some peers var lg = gossip_service.crds_table_rw.write(); - var peers = ArrayList(crds.LegacyContactInfo).init(allocator); + var peers = ArrayList(LegacyContactInfo).init(allocator); defer peers.deinit(); for (0..10) |_| { var rand_keypair = try KeyPair.create(null); @@ -1894,7 +1871,7 @@ test "gossip.gossip_service: tests handle_pull_response" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -1955,7 +1932,7 @@ test "gossip.gossip_service: tests handle_pull_request" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -2048,7 +2025,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -2075,7 +2052,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { } // insert contact info to send prunes to - var send_contact_info = crds.LegacyContactInfo.random(rng.random()); + var send_contact_info = LegacyContactInfo.random(rng.random()); send_contact_info.id = push_from; // valid socket addr var gossip_socket = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 20); @@ -2137,7 +2114,7 @@ test "gossip.gossip_service: test build_pull_requests" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -2181,7 +2158,7 @@ test "gossip.gossip_service: test build_push_messages" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -2199,7 +2176,7 @@ test "gossip.gossip_service: test build_push_messages" { defer gossip_service.deinit(); // add some peers - var peers = ArrayList(crds.LegacyContactInfo).init(allocator); + var peers = ArrayList(LegacyContactInfo).init(allocator); defer peers.deinit(); var lg = gossip_service.crds_table_rw.write(); for (0..10) |_| { @@ -2253,7 +2230,7 @@ test "gossip.gossip_service: test packet verification" { var keypair = try KeyPair.create([_]u8{1} ** 32); var id = Pubkey.fromPublicKey(&keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(id); + var contact_info = LegacyContactInfo.default(id); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -2385,7 +2362,7 @@ test "gossip.gossip_service: process contact_info push packet" { var my_keypair = try KeyPair.create([_]u8{1} ** 32); var my_pubkey = Pubkey.fromPublicKey(&my_keypair.public_key, true); - var contact_info = crds.LegacyContactInfo.default(my_pubkey); + var contact_info = LegacyContactInfo.default(my_pubkey); contact_info.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var logger = Logger.init(std.testing.allocator, .debug); @@ -2418,7 +2395,7 @@ test "gossip.gossip_service: process contact_info push packet" { var id = pk; // new contact info - var legacy_contact_info = crds.LegacyContactInfo.default(id); + var legacy_contact_info = LegacyContactInfo.default(id); var crds_data = crds.CrdsData{ .LegacyContactInfo = legacy_contact_info, }; @@ -2474,7 +2451,7 @@ test "gossip.gossip_service: init, exit, and deinit" { var gossip_address = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 0); var my_keypair = try KeyPair.create(null); var rng = std.rand.DefaultPrng.init(getWallclockMs()); - var contact_info = crds.LegacyContactInfo.random(rng.random()); + var contact_info = LegacyContactInfo.random(rng.random()); contact_info.gossip = gossip_address; var exit = AtomicBool.init(false); var logger = Logger.init(std.testing.allocator, .debug); @@ -2527,7 +2504,7 @@ pub const BenchmarkGossipServiceGeneral = struct { var endpoint = address.toEndpoint(); var pubkey = Pubkey.fromPublicKey(&keypair.public_key, false); - var contact_info = crds.LegacyContactInfo.default(pubkey); + var contact_info = LegacyContactInfo.default(pubkey); contact_info.shred_version = 19; contact_info.gossip = address; diff --git a/src/gossip/ping_pong.zig b/src/gossip/ping_pong.zig index 9c7c7784b..9d2345f36 100644 --- a/src/gossip/ping_pong.zig +++ b/src/gossip/ping_pong.zig @@ -217,16 +217,16 @@ pub const PingCache = struct { allocator: std.mem.Allocator, our_keypair: KeyPair, peers: []LegacyContactInfo, - ) error{OutOfMemory}!struct { valid_peers: std.ArrayList(LegacyContactInfo), pings: std.ArrayList(PingAndSocketAddr) } { + ) error{OutOfMemory}!struct { valid_peers: std.ArrayList(usize), pings: std.ArrayList(PingAndSocketAddr) } { var now = std.time.Instant.now() catch @panic("time not supported by OS!"); - var valid_peers = std.ArrayList(LegacyContactInfo).init(allocator); + var valid_peers = std.ArrayList(usize).init(allocator); var pings = std.ArrayList(PingAndSocketAddr).init(allocator); - for (peers) |peer| { + for (peers, 0..) |peer, i| { if (!peer.gossip.isUnspecified()) { var result = self.check(now, PubkeyAndSocketAddr{ peer.id, peer.gossip }, &our_keypair); if (result.passes_ping_check) { - try valid_peers.append(peer); + try valid_peers.append(i); } if (result.maybe_ping) |ping| { try pings.append(.{ .ping = ping, .socket = peer.gossip }); From 9356b3e62eb350a3cf85c03e56addfa338e90c93 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 10 Oct 2023 13:43:36 -0400 Subject: [PATCH 33/72] fix pull req ping logic further --- src/gossip/gossip_service.zig | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 667ffb964..f3bf1209d 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -706,17 +706,15 @@ pub const GossipService = struct { while (!self.exit.load(std.atomic.Ordering.Unordered)) { const top_of_loop_ts = getWallclockMs(); - // TODO: send ping messages based on PingCache - - // new pull msgs if (should_send_pull_requests) pull_blk: { - var pull_packets = self.buildPullRequests( + // this also includes sending ping messages to other peers + var packets = self.buildPullRequests( pull_request.MAX_BLOOM_SIZE, ) catch |e| { self.logger.debugf("failed to generate pull requests: {any}", .{e}); break :pull_blk; }; - try self.packet_outgoing_channel.send(pull_packets); + try self.packet_outgoing_channel.send(packets); } // every other loop should_send_pull_requests = !should_send_pull_requests; @@ -1138,24 +1136,9 @@ pub const GossipService = struct { } const result = try ping_cache.filterValidPeers(self.allocator, self.my_keypair, peers.items); - const ping_and_addrs = result.pings; - defer ping_and_addrs.deinit(); - - const n_pings = ping_and_addrs.items.len; - if (n_pings > 0) { - var ping_packets = try ArrayList(Packet).initCapacity(self.allocator, n_pings); - ping_packets.appendNTimesAssumeCapacity(Packet.default(), n_pings); - - for (ping_and_addrs.items, ping_packets.items) |*ping_and_addr, *packet| { - const ping = ping_and_addr.ping; - const protocol_msg = Protocol{ .PingMessage = ping }; + defer result.pings.deinit(); + try self.sendPings(result.pings); - var serialized_ping = bincode.writeToSlice(&packet.data, protocol_msg, .{}) catch return error.SerializationError; - packet.addr = ping_and_addr.socket.toEndpoint(); - packet.size = serialized_ping.len; - } - try self.packet_outgoing_channel.send(ping_packets); - } break :blk result.valid_peers; }; defer valid_indexs.deinit(); @@ -1560,6 +1543,8 @@ pub const GossipService = struct { pings: ArrayList(PingAndSocketAddr), ) error{ OutOfMemory, ChannelClosed, SerializationError }!void { const n_pings = pings.items.len; + if (n_pings == 0) return; + var packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_pings); errdefer packet_batch.deinit(); packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_pings); From a55429607c3d04ee93f1c5d7353de6df7ec93313 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 10 Oct 2023 14:04:27 -0400 Subject: [PATCH 34/72] fix mem leak on push messages --- src/gossip/gossip_service.zig | 35 ++++++++++++++++++++++++++++++----- src/sync/channel.zig | 32 ++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index f3bf1209d..6c4b14a1c 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -726,9 +726,8 @@ pub const GossipService = struct { break :blk null; }; if (maybe_push_packets) |push_packets| { - for (push_packets.items) |packet_batch| { - try self.packet_outgoing_channel.send(packet_batch); - } + try self.packet_outgoing_channel.sendBatch(push_packets); + push_packets.deinit(); } // trim data @@ -836,10 +835,13 @@ pub const GossipService = struct { } var num_values_considered: usize = 0; - var active_set_lock = self.active_set_rw.read(); - var active_set: *const ActiveSet = active_set_lock.get(); { + var active_set_lock = self.active_set_rw.read(); + var active_set: *const ActiveSet = active_set_lock.get(); defer active_set_lock.unlock(); + + if (active_set.len == 0) return null; + for (crds_entries) |entry| { const value = entry.value; @@ -1770,6 +1772,29 @@ test "gossip.gossip_service: build messages startup and shutdown" { defer gossip_service.deinit(); var build_messages_handle = try Thread.spawn(.{}, GossipService.buildMessages, .{&gossip_service}); + + // add some crds values to push + var rng = std.rand.DefaultPrng.init(91); + var lg = gossip_service.crds_table_rw.write(); + var ping_lock = gossip_service.ping_cache_rw.write(); + var ping_cache: *PingCache = ping_lock.mut(); + + var peers = ArrayList(LegacyContactInfo).init(allocator); + defer peers.deinit(); + + for (0..10) |_| { + var rand_keypair = try KeyPair.create(null); + var value = try CrdsValue.randomWithIndex(rng.random(), &rand_keypair, 0); // contact info + // make gossip valid + value.data.LegacyContactInfo.gossip = SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 8000); + try lg.mut().insert(value, getWallclockMs()); + try peers.append(value.data.LegacyContactInfo); + // set the pong status as OK so they included in active set + ping_cache._setPong(value.data.LegacyContactInfo.id, value.data.LegacyContactInfo.gossip); + } + lg.unlock(); + ping_lock.unlock(); + std.time.sleep(std.time.ns_per_s * 3); exit.store(true, std.atomic.Ordering.Unordered); diff --git a/src/sync/channel.zig b/src/sync/channel.zig index 4d8d1b1c8..ed195e1a8 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -11,7 +11,7 @@ const Ordering = std.atomic.Ordering; pub fn Channel(comptime T: type) type { return struct { buffer: Mux(std.ArrayList(T)), - hasValue: Condition = .{}, + has_value: Condition = .{}, closed: Atomic(bool) = Atomic(bool).init(false), allocator: std.mem.Allocator, @@ -39,10 +39,26 @@ pub fn Channel(comptime T: type) type { if (self.closed.load(.Monotonic)) { return error.ChannelClosed; } - var buffer = self.buffer.lock(); - defer buffer.unlock(); - try buffer.mut().append(value); - self.hasValue.signal(); + var buffer_lock = self.buffer.lock(); + defer buffer_lock.unlock(); + + var buffer: *std.ArrayList(T) = buffer_lock.mut(); + try buffer.append(value); + + self.has_value.signal(); + } + + pub fn sendBatch(self: *Self, value: std.ArrayList(T)) error{ OutOfMemory, ChannelClosed }!void { + if (self.closed.load(.Monotonic)) { + return error.ChannelClosed; + } + var buffer_lock = self.buffer.lock(); + defer buffer_lock.unlock(); + + var buffer: *std.ArrayList(T) = buffer_lock.mut(); + try buffer.appendSlice(value.items); + + self.has_value.signal(); } pub fn receive(self: *Self) ?T { @@ -50,7 +66,7 @@ pub fn Channel(comptime T: type) type { defer buffer.unlock(); while (buffer.get().items.len == 0 and !self.closed.load(.SeqCst)) { - buffer.condition(&self.hasValue); + buffer.condition(&self.has_value); } // channel closed so return null to signal no more items @@ -69,7 +85,7 @@ pub fn Channel(comptime T: type) type { defer buffer.unlock(); while (buffer.get().items.len == 0 and !self.closed.load(.SeqCst)) { - buffer.condition(&self.hasValue); + buffer.condition(&self.has_value); } // channel closed so return null to signal no more items @@ -111,7 +127,7 @@ pub fn Channel(comptime T: type) type { pub fn close(self: *Self) void { self.closed.store(true, .SeqCst); - self.hasValue.broadcast(); + self.has_value.broadcast(); } pub fn isClosed(self: *Self) bool { From 9be8e497eeafcfdb3f67f0e8c3cd457600602605 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 10 Oct 2023 14:31:40 -0400 Subject: [PATCH 35/72] fix --- src/gossip/gossip_service.zig | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 6c4b14a1c..3c7eaa157 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -624,8 +624,7 @@ pub const GossipService = struct { if (n_ping_messages > 0) { var x_timer = std.time.Timer.start() catch unreachable; - // init a new batch of responses - // TODO: figure out a way to re-use this allocation instead of freeing after responder sends it + // init a new batch of pong responses var ping_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); errdefer ping_packet_batch.deinit(); @@ -751,7 +750,6 @@ pub const GossipService = struct { } self.rotateActiveSet() catch @panic("out of memory"); - last_push_ts = getWallclockMs(); } @@ -1167,8 +1165,6 @@ pub const GossipService = struct { var output_limit = std.atomic.Atomic(i64).init(MAX_NUM_CRDS_VALUES_PULL_RESPONSE); for (valid_indexs.items) |i| { - // TODO: pre-allocate these tasks - // create the thread task var task_heap = try self.allocator.create(PullRequestTask); task_heap.* = PullRequestTask{ @@ -1456,7 +1452,6 @@ pub const GossipService = struct { var n_packets = pubkey_to_failed_origins_iter.len; if (n_packets == 0) return; - // TODO: figure out a way to re-use this allocation var prune_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_packets); prune_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_packets); var count: usize = 0; From 0f5a6717466c63c4664ea0fcc60f4f281e230112 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Tue, 10 Oct 2023 14:36:36 -0400 Subject: [PATCH 36/72] small fix --- src/gossip/ping_pong.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gossip/ping_pong.zig b/src/gossip/ping_pong.zig index 9d2345f36..9caf87bcc 100644 --- a/src/gossip/ping_pong.zig +++ b/src/gossip/ping_pong.zig @@ -222,7 +222,7 @@ pub const PingCache = struct { var valid_peers = std.ArrayList(usize).init(allocator); var pings = std.ArrayList(PingAndSocketAddr).init(allocator); - for (peers, 0..) |peer, i| { + for (peers, 0..) |*peer, i| { if (!peer.gossip.isUnspecified()) { var result = self.check(now, PubkeyAndSocketAddr{ peer.id, peer.gossip }, &our_keypair); if (result.passes_ping_check) { From 8b3fee9d55189b37c19c2d9d4b583724a3bdd933 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 13 Oct 2023 09:05:39 -0400 Subject: [PATCH 37/72] fix --- src/common/lru.zig | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index b86700abb..01bdc867f 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -4,8 +4,6 @@ const TailQueue = std.TailQueue; const testing = std.testing; const assert = std.debug.assert; -/// A thread-safe LRU Cache -/// // TODO: allow for passing custom hash context to use in std.ArrayHashMap for performance. pub fn LruCache(comptime K: type, comptime V: type) type { return struct { From c623221a1b86757eab65312aa3e36f7c964aa812 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 13 Oct 2023 09:34:35 -0400 Subject: [PATCH 38/72] clean up --- src/cmd/cmd.zig | 4 +- src/gossip/fuzz.zig | 2 +- src/gossip/gossip_service.zig | 192 ++++++++++++++++++---------------- 3 files changed, 103 insertions(+), 95 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index e5505f18a..ff73d8674 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -114,8 +114,8 @@ fn gossip(_: []const []const u8) !void { var handle = try std.Thread.spawn( .{}, - GossipService.runSpy, - .{&gossip_service}, + GossipService.run, + .{ &gossip_service, true }, ); handle.join(); diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 52e88486b..9b134a1a3 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -292,7 +292,7 @@ pub fn main() !void { .noop, ); - var fuzz_handle = try std.Thread.spawn(.{}, GossipService.runSpy, .{&gossip_service_fuzzer}); + var fuzz_handle = try std.Thread.spawn(.{}, GossipService.run, .{ &gossip_service_fuzzer, true }); // std.debug.print("setting up", .{}); // while (true) { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 3c7eaa157..1059f134a 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -258,10 +258,10 @@ pub const GossipService = struct { /// 1) socket reciever /// 2) packet verifier /// 3) packet processor - /// 4) build message loop (to send outgoing message) + /// 4) build message loop (to send outgoing message) (only active if not a spy node) /// 5) a socket responder (to send outgoing packets) /// 6) echo server - pub fn run(self: *Self) !void { + pub fn run(self: *Self, spy_node: bool) !void { var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); defer self.joinAndExit(&ip_echo_server_listener_handle); @@ -280,39 +280,13 @@ pub const GossipService = struct { var packet_handle = try Thread.spawn(.{}, Self.processMessages, .{self}); defer self.joinAndExit(&packet_handle); - var build_messages_handle = try Thread.spawn(.{}, Self.buildMessages, .{self}); - defer self.joinAndExit(&build_messages_handle); - - // outputer thread - var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ - &self.gossip_socket, - self.packet_outgoing_channel, - self.exit, - self.logger, - }); - defer self.joinAndExit(&responder_handle); - } - - pub fn runSpy(self: *Self) !void { - var ip_echo_server_listener_handle = try Thread.spawn(.{}, echo.Server.listenAndServe, .{&self.echo_server}); - defer self.joinAndExit(&ip_echo_server_listener_handle); - - var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ - self.allocator, - &self.gossip_socket, - self.packet_incoming_channel, - self.exit, - self.logger, - }); - defer self.joinAndExit(&receiver_handle); - - var packet_verifier_handle = try Thread.spawn(.{}, Self.verifyPackets, .{self}); - defer self.joinAndExit(&packet_verifier_handle); - - var packet_handle = try Thread.spawn(.{}, Self.processMessages, .{self}); - defer self.joinAndExit(&packet_handle); + var maybe_build_messages_handle = if (!spy_node) try Thread.spawn(.{}, Self.buildMessages, .{self}) else null; + defer { + if (maybe_build_messages_handle) |*handle| { + self.joinAndExit(handle); + } + } - // outputer thread var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ &self.gossip_socket, self.packet_outgoing_channel, @@ -326,6 +300,7 @@ pub const GossipService = struct { packet: *const Packet, allocator: std.mem.Allocator, verified_incoming_channel: *Channel(ProtocolMessage), + logger: Logger, task: Task, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(false), @@ -340,15 +315,18 @@ pub const GossipService = struct { self.packet.data[0..self.packet.size], bincode.Params.standard, ) catch { + self.logger.debugf("gossip: packet_verify: failed to deserialize\n", .{}); return; }; protocol_message.sanitize() catch { + self.logger.debugf("gossip: packet_verify: failed to sanitize\n", .{}); bincode.free(self.allocator, protocol_message); return; }; protocol_message.verifySignature() catch { + self.logger.debugf("gossip: packet_verify: failed to verify signature\n", .{}); bincode.free(self.allocator, protocol_message); return; }; @@ -374,6 +352,7 @@ pub const GossipService = struct { .allocator = self.allocator, .verified_incoming_channel = self.verified_incoming_channel, .packet = &Packet.default(), + .logger = self.logger, }; tasks[i] = verify_task_heap; } @@ -481,7 +460,6 @@ pub const GossipService = struct { const maybe_protocol_messages = try self.verified_incoming_channel.try_drain(); if (maybe_protocol_messages == null) { - // std.time.sleep(std.time.ns_per_ms * 1); continue; } @@ -583,7 +561,9 @@ pub const GossipService = struct { if (push_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = push_messages.items.len; - try self.handleBatchPushMessages(&push_messages, self.logger); + self.handleBatchPushMessages(&push_messages) catch |err| { + std.debug.print("handleBatchPushMessages failed: {}\n", .{err}); + }; const elapsed = x_timer.read(); self.logger.debugf("handle batch push took {} with {} items @{}\n", .{ elapsed, length, msg_count }); push_messages.clearRetainingCapacity(); @@ -592,7 +572,7 @@ pub const GossipService = struct { if (prune_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = prune_messages.items.len; - try self.handleBatchPruneMessages(&prune_messages); + self.handleBatchPruneMessages(&prune_messages); const elapsed = x_timer.read(); self.logger.debugf("handle batch prune took {} with {} items @{}\n", .{ elapsed, length, msg_count }); prune_messages.clearRetainingCapacity(); @@ -612,7 +592,7 @@ pub const GossipService = struct { if (pull_responses.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_responses.items.len; - self.handleBatchPullResponses(&pull_responses, self.logger) catch |err| { + self.handleBatchPullResponses(&pull_responses) catch |err| { std.debug.print("handleBatchPullResponses failed: {}\n", .{err}); }; const elapsed = x_timer.read(); @@ -620,52 +600,21 @@ pub const GossipService = struct { pull_responses.clearRetainingCapacity(); } - const n_ping_messages = ping_messages.items.len; - if (n_ping_messages > 0) { + if (ping_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; - - // init a new batch of pong responses - var ping_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); - ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); - errdefer ping_packet_batch.deinit(); - - for (ping_messages.items, 0..) |*ping_message, i| { - const pong = try Pong.init(ping_message.ping, &self.my_keypair); - const pong_message = Protocol{ .PongMessage = pong }; - - var packet = &ping_packet_batch.items[i]; - const bytes_written = try bincode.writeToSlice( - &packet.data, - pong_message, - bincode.Params.standard, - ); - - packet.size = bytes_written.len; - packet.addr = ping_message.from_endpoint.*; - } - try self.packet_outgoing_channel.send(ping_packet_batch); - + const n_ping_messages = ping_messages.items.len; + self.handleBatchPingMessages(&ping_messages) catch |err| { + std.debug.print("handleBatchPingMessages failed: {}\n", .{err}); + }; self.logger.debugf("handle batch ping took {} with {} items @{}\n", .{ x_timer.read(), n_ping_messages, msg_count }); ping_messages.clearRetainingCapacity(); } if (pong_messages.items.len > 0) { var x_timer = std.time.Timer.start() catch unreachable; - const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); - const length = pong_messages.items.len; - - var ping_cache_lock = self.ping_cache_rw.write(); - defer ping_cache_lock.unlock(); - var ping_cache: *PingCache = ping_cache_lock.mut(); - - for (pong_messages.items) |*pong_message| { - _ = ping_cache.receviedPong( - pong_message.pong, - SocketAddr.fromEndpoint(pong_message.from_endpoint), - now, - ); - } - self.logger.debugf("handle batch pong took {} with {} items @{}\n", .{ x_timer.read(), length, msg_count }); + const n_pong_messages = pong_messages.items.len; + self.handleBatchPongMessages(&pong_messages); + self.logger.debugf("handle batch pong took {} with {} items @{}\n", .{ x_timer.read(), n_pong_messages, msg_count }); pong_messages.clearRetainingCapacity(); } @@ -1201,6 +1150,60 @@ pub const GossipService = struct { } } + pub fn handleBatchPongMessages( + self: *Self, + pong_messages: *const ArrayList(PongMessage), + ) void { + const now = std.time.Instant.now() catch @panic("time is not supported on the OS!"); + + var ping_cache_lock = self.ping_cache_rw.write(); + defer ping_cache_lock.unlock(); + var ping_cache: *PingCache = ping_cache_lock.mut(); + + for (pong_messages.items) |*pong_message| { + _ = ping_cache.receviedPong( + pong_message.pong, + SocketAddr.fromEndpoint(pong_message.from_endpoint), + now, + ); + } + } + + pub fn handleBatchPingMessages( + self: *Self, + ping_messages: *const ArrayList(PingMessage), + ) !void { + const n_ping_messages = ping_messages.items.len; + + // init a new batch of pong responses + var ping_packet_batch = try ArrayList(Packet).initCapacity(self.allocator, n_ping_messages); + ping_packet_batch.appendNTimesAssumeCapacity(Packet.default(), n_ping_messages); + errdefer ping_packet_batch.deinit(); + + for (ping_messages.items, 0..) |*ping_message, i| { + const pong = try Pong.init(ping_message.ping, &self.my_keypair); + const pong_message = Protocol{ .PongMessage = pong }; + + var packet = &ping_packet_batch.items[i]; + const bytes_written = try bincode.writeToSlice( + &packet.data, + pong_message, + bincode.Params.standard, + ); + + packet.size = bytes_written.len; + packet.addr = ping_message.from_endpoint.*; + + const endpoint_str = try endpointToString(self.allocator, ping_message.from_endpoint); + defer endpoint_str.deinit(); + self.logger + .field("from_endpoint", endpoint_str.items) + .field("from_pubkey", &ping_message.ping.from.string()) + .info("gossip: recv ping"); + } + try self.packet_outgoing_channel.send(ping_packet_batch); + } + /// logic for handling a pull response message. /// successful inserted values, have their origin value timestamps updated. /// failed inserts (ie, too old or duplicate values) are added to the failed pull hashes so that they can be @@ -1208,12 +1211,10 @@ pub const GossipService = struct { pub fn handleBatchPullResponses( self: *Self, pull_response_messages: *const ArrayList(PullResponseMessage), - logger: Logger, ) !void { if (pull_response_messages.items.len == 0) { return; } - _ = logger; const now = getWallclockMs(); var failed_insert_ptrs = ArrayList(*CrdsValue).init(self.allocator); @@ -1289,7 +1290,7 @@ pub const GossipService = struct { pub fn handleBatchPruneMessages( self: *Self, prune_messages: *const ArrayList(*PruneData), - ) !void { + ) void { var active_set_lock = self.active_set_rw.write(); defer active_set_lock.unlock(); var active_set: *ActiveSet = active_set_lock.mut(); @@ -1365,12 +1366,10 @@ pub const GossipService = struct { pub fn handleBatchPushMessages( self: *Self, batch_push_messages: *const ArrayList(PushMessage), - logger: Logger, ) !void { if (batch_push_messages.items.len == 0) { return; } - _ = logger; var pubkey_to_failed_origins = std.AutoArrayHashMap( Pubkey, @@ -1410,6 +1409,12 @@ pub const GossipService = struct { const failed_insert_indexs = result.failed.?; defer failed_insert_indexs.deinit(); + self.logger + .field("n_values", valid_len) + .field("from_addr", &push_message.from_pubkey.string()) + .field("n_failed_inserts", failed_insert_indexs.items.len) + .info("gossip: recv push_message"); + if (failed_insert_indexs.items.len == 0) { // dont need to build prune messages continue; @@ -1474,6 +1479,11 @@ pub const GossipService = struct { prune_data.sign(&self.my_keypair) catch return error.SignatureError; var protocol = Protocol{ .PruneMessage = .{ self.my_pubkey, prune_data } }; + self.logger + .field("n_pruned_origins", prune_size) + .field("to_addr", &from_pubkey.string()) + .info("gossip: send prune_message"); + var packet = &prune_packet_batch.items[count]; var written_slice = bincode.writeToSlice(&packet.data, protocol, bincode.Params{}) catch unreachable; packet.size = written_slice.len; @@ -1860,7 +1870,7 @@ test "gossip.gossip_service: tests handle_prune_messages" { defer data.deinit(); try data.append(&prune_data); - try gossip_service.handleBatchPruneMessages(&data); + gossip_service.handleBatchPruneMessages(&data); var as_lock2 = gossip_service.active_set_rw.read(); var as2: *const ActiveSet = as_lock2.get(); @@ -1910,7 +1920,7 @@ test "gossip.gossip_service: tests handle_pull_response" { .from_pubkey = &my_pubkey, }); - try gossip_service.handleBatchPullResponses(&data, logger); + try gossip_service.handleBatchPullResponses(&data); // make sure values are inserted var crds_table_lock = gossip_service.crds_table_rw.read(); @@ -1921,7 +1931,7 @@ test "gossip.gossip_service: tests handle_pull_response" { crds_table_lock.unlock(); // try inserting again with same values (should all fail) - try gossip_service.handleBatchPullResponses(&data, logger); + try gossip_service.handleBatchPullResponses(&data); var lg = gossip_service.failed_pull_hashes_mux.lock(); var failed_pull_hashes: *HashTimeQueue = lg.mut(); @@ -2080,7 +2090,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { .from_pubkey = &push_from, }); - try gossip_service.handleBatchPushMessages(&msgs, logger); + try gossip_service.handleBatchPushMessages(&msgs); { var packet_lg = gossip_service.packet_outgoing_channel.buffer.lock(); defer packet_lg.unlock(); @@ -2089,7 +2099,7 @@ test "gossip.gossip_service: test build prune messages and handle_push_msgs" { try std.testing.expect(outgoing_packets.items.len == 0); } - try gossip_service.handleBatchPushMessages(&msgs, logger); + try gossip_service.handleBatchPushMessages(&msgs); var packet = blk: { var packet_lg = gossip_service.packet_outgoing_channel.buffer.lock(); defer packet_lg.unlock(); @@ -2474,8 +2484,8 @@ test "gossip.gossip_service: init, exit, and deinit" { var handle = try std.Thread.spawn( .{}, - GossipService.runSpy, - .{&gossip_service}, + GossipService.run, + .{ &gossip_service, true }, ); gossip_service.echo_server.kill(); @@ -2531,9 +2541,7 @@ pub const BenchmarkGossipServiceGeneral = struct { gossip_service.echo_server.kill(); // we dont need this rn defer gossip_service.deinit(); - var packet_handle = try Thread.spawn(.{}, GossipService.runSpy, .{ - &gossip_service, - }); + var packet_handle = try Thread.spawn(.{}, GossipService.run, .{ &gossip_service, true }); // send incomign packets/messages var outgoing_channel = Channel(ArrayList(Packet)).init(allocator, 10_000); From f2d218118df0ed268eb1884eedc4cac948eefa22 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 13 Oct 2023 09:37:58 -0400 Subject: [PATCH 39/72] fix --- src/gossip/gossip_service.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 1059f134a..6d6ba205e 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -562,7 +562,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; const length = push_messages.items.len; self.handleBatchPushMessages(&push_messages) catch |err| { - std.debug.print("handleBatchPushMessages failed: {}\n", .{err}); + self.logger.debugf("handleBatchPushMessages failed: {}\n", .{err}); }; const elapsed = x_timer.read(); self.logger.debugf("handle batch push took {} with {} items @{}\n", .{ elapsed, length, msg_count }); @@ -582,7 +582,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_requests.items.len; self.handleBatchPullRequest(pull_requests) catch |err| { - std.debug.print("handleBatchPullRequest failed: {}\n", .{err}); + self.logger.debugf("handleBatchPullRequest failed: {}\n", .{err}); }; const elapsed = x_timer.read(); self.logger.debugf("handle batch pull_req took {} with {} items @{}\n", .{ elapsed, length, msg_count }); @@ -593,7 +593,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; const length = pull_responses.items.len; self.handleBatchPullResponses(&pull_responses) catch |err| { - std.debug.print("handleBatchPullResponses failed: {}\n", .{err}); + self.logger.debugf("handleBatchPullResponses failed: {}\n", .{err}); }; const elapsed = x_timer.read(); self.logger.debugf("handle batch pull_resp took {} with {} items @{}\n", .{ elapsed, length, msg_count }); @@ -604,7 +604,7 @@ pub const GossipService = struct { var x_timer = std.time.Timer.start() catch unreachable; const n_ping_messages = ping_messages.items.len; self.handleBatchPingMessages(&ping_messages) catch |err| { - std.debug.print("handleBatchPingMessages failed: {}\n", .{err}); + self.logger.debugf("handleBatchPingMessages failed: {}\n", .{err}); }; self.logger.debugf("handle batch ping took {} with {} items @{}\n", .{ x_timer.read(), n_ping_messages, msg_count }); ping_messages.clearRetainingCapacity(); From 59e6bfbad1eef2eec35fbadf369fc16b3953f171 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 4 Dec 2023 09:56:26 -0500 Subject: [PATCH 40/72] docs: flesh out readme and fix version number in zon file --- build.zig.zon | 2 +- docs/CONTRIBUTING.md | 4 +- readme.md | 190 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 187 insertions(+), 9 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 6faba9e9f..c97036005 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,6 +1,6 @@ .{ .name = "sig", - .version = "0.1.0", + .version = "0.1.1", .dependencies = .{ .@"base58-zig" = .{ .url = "https://github.com/ultd/base58-zig/archive/fe23d156211edbfcede019970d1619184aadb83b.tar.gz", diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 4f1a78a91..e19a98cf4 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -12,8 +12,8 @@ Before you start contributing, please take a moment to read and understand this ```zig fn do_something(maybe_foo: ?Foo) void { - if (maybe_foo) |foo| { - // do something with foo here + if (maybe_foo) |foo| { + // do something with foo here } } ``` diff --git a/readme.md b/readme.md index 24c433dae..63309004c 100644 --- a/readme.md +++ b/readme.md @@ -1,24 +1,202 @@

-

 🤖⚡  Sig - a Solana Zig validator client

-
+

 🤖⚡  Sig - a Solana Zig validator client


+
Version Build status - Zig - License + Zig + License +


-## Overview - _Sig_ is a Solana validator client implementation written in Zig.

⚠️ NOTE: This is a WIP, please open any issues for any bugs/improvements. +## 📦 Setup + +Zig 0.11.0 is required to build Sig. + +
Details + +### Build Dependencies +- Zig 0.11.0 - Choose one: + - [Binary Releases](https://ziglang.org/download/) (extract and add to PATH) + - [Install with a package manager](https://github.com/ziglang/zig/wiki/Install-Zig-from-a-Package-Manager) + - Manage multiple versions with [zigup](https://github.com/marler8997/zigup) or [zvm](https://www.zvm.app/) + +### Developer Tools +These tools are optional but recommended for a smooth development process. + +- [Zig Language Server (ZLS) 0.11.0](https://github.com/zigtools/zls/wiki/Installation) +- [lldb](https://lldb.llvm.org/): [Zig CLI Debugging](https://devlog.hexops.com/2022/debugging-undefined-behavior/) +- [Zig Language](https://marketplace.visualstudio.com/items?itemName=ziglang.vscode-zig) VS Code extension +- [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb) VS Code extension + +#### Visual Studio Code + +If you use VS Code, you should install the [Zig Language](https://marketplace.visualstudio.com/items?itemName=ziglang.vscode-zig) extension. It can use your installed versions of Zig and ZLS, or it can download and manage its own internal versions. + +You can use [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb) to debug Zig code with lldb in VS Code's debugging GUI. If you'd like to automatically build the project before running the debugger, you'll need a `zig build` task. + +
tasks.json + +```json +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "zig build", + "type": "shell", + "command": "zig", + "args": [ + "build", + "--summary", + "all" + ], + "options": { + "cwd": "${workspaceRoot}" + }, + "presentation": { + "echo": true, + "reveal": "always", + "focus": false, + "panel": "shared", + "showReuseMessage": true, + "clear": false + }, + "problemMatcher": [], + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} +``` +

+ +To run the debugger, you need a run configuration. This launch.json includes an example for debugging gossip. Customize the args as desired. + +
launch.json + +```json +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug Gossip Mainnet", + "program": "${workspaceFolder}/zig-out/bin/sig", + "args": ["gossip", "--entrypoint", "34.83.231.102:8001", "--entrypoint", "145.40.67.83:8001", "--entrypoint", "147.75.38.117:8001", "--entrypoint", "145.40.93.177:8001", "--entrypoint", "86.109.15.59:8001"], + "cwd": "${workspaceFolder}", + "preLaunchTask": "zig build" + }, + ] +} +``` +

+ +
+ +## 🔧 Build + +```bash +zig build +``` + +## ▶️ Usage + +### 🚀 Run Sig + +Run Sig with `zig` or execute the binary you already built: +```bash +zig build run -- --help +``` +```bash +./zig-out/bin/sig --help +``` + +For simplicity, the above commands will be abbreviated as `sig` in the rest of this document. An alias can be used to realize this abbreviation in your shell. +```bash +alias sig="$(pwd)/zig-out/bin/sig" +``` + +### 👤 Identity + +Sig stores its private key in `~/.sig/identity.key`. On its first run, Sig will automatically generate a key if no key exists. + +To see the public key, use the `identity` subcommand: +```bash +sig identity +``` + +### 📞 Gossip + +To run Sig as a Solana gossip client, use the `gossip` subcommand. Specify entrypoints to connect to a cluster. Optionally use `-p` to specify a custom listening port (default is 8001). +```bash +sig gossip -p --entrypoint : +``` + +
mainnet + +```bash +sig gossip --entrypoint 34.83.231.102:8001 \ + --entrypoint 145.40.67.83:8001 \ + --entrypoint 147.75.38.117:8001 \ + --entrypoint 145.40.93.177:8001 \ + --entrypoint 86.109.15.59:8001 +``` +
+ +
devnet + +```bash +sig gossip --entrypoint 35.197.53.105:8001 \ + --entrypoint 147.75.55.147:8001 \ + --entrypoint 136.144.49.15:8001 \ + --entrypoint 145.40.71.85:8001 \ + --entrypoint 147.75.105.51:8001 +``` +
+ +
testnet + +```bash +sig gossip --entrypoint 35.203.170.30:8001 \ + --entrypoint 139.178.68.207:8001 \ + --entrypoint 139.178.68.207:8001 +``` +
+ + +### 🧪 Testing +```bash +zig build test --summary all +``` + +## Learn More +[Zig](https://ziglang.org/) +- [Documentation](https://ziglang.org/documentation/0.11.0/) +- [Ziglearn Book](https://ziglearn.org/) +- [Ziglings Exercises](https://github.com/ratfactor/ziglings) + +[Solana](https://solana.com/) +- [Documentation](https://docs.solana.com/validator/anatomy) +- [Code](https://github.com/solana-labs/solana) + +Sig +- [Introduction](https://blog.syndica.io/introducing-sig-by-syndica-an-rps-focused-solana-validator-client-written-in-zig/) +- [Gossip Deep Dive](https://blog.syndica.io/sig-engineering-1-gossip-protocol/) + ## Why Zig? Zig's own definition: `Zig is a general-purpose programming language and toolchain for maintaining robust, optimal and reusable software.` From c286d2c6462e54760bdeea6982739946fa786c4f Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 4 Dec 2023 15:34:47 -0500 Subject: [PATCH 41/72] docs: organization, restructure readme, add more test/dev info, and move some content into other files --- docs/CONTRIBUTING.md | 4 + docs/api.md | 2434 +++++++++++++++++++++++++++++++++++++++ readme.md | 2565 ++---------------------------------------- 3 files changed, 2514 insertions(+), 2489 deletions(-) create mode 100644 docs/api.md diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index e19a98cf4..aaf666ff5 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -4,6 +4,10 @@ Thank you for considering contributing to Syndica's Sig project! We appreciate y Before you start contributing, please take a moment to read and understand this Contributing Guidelines document. It will help you get started and ensure a smooth collaboration process. +## Dev Environment Setup + +See the [readme](../readme.md#-setup). + ## Style Guide ### Optional Values diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 000000000..be84a835b --- /dev/null +++ b/docs/api.md @@ -0,0 +1,2434 @@ + +### `core.Pubkey` - API Reference + +A struct which holds a Public Key of a Solana account (`[32]u8`). + +
+ +From a string: + +```zig +const Pubkey = @import("sig").core.Pubkey; + +fn main() !void { + + const pubkey = try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"); + +} +``` + +
+ +From raw bytes: + +```zig +const Pubkey = @import("sig").core.Pubkey; + +fn main() !void { + + // Automatically encodes and caches the string value + const pubkey = try Pubkey.fromBytes( + &[32]u8{ + 44, 64, 232, 153, 35, 67, 7, 9, 46, 6, 87, 76, 55, 55, 65, 5, + 99, 0, 48, 64, 75, 8, 127, 53, 57, 12, 7, 54, 8, 133, 246, 4, + }, + .{}, + ); + + + // Optionally skip encoding if (in the rare scenario) you will never call the string() method, you can + // set this option to true and it will not decode & cache the encoded value. This can be helpful in + // scenarios where you plan to only use the bytes and want to save on expensive base58 encoding. + const pubkey = try Pubkey.fromBytes( + &[32]u8{ + 44, 64, 232, 153, 35, 67, 7, 9, 46, 6, 87, 76, 55, 55, 65, 5, + 99, 0, 48, 64, 75, 8, 127, 53, 57, 12, 7, 54, 8, 133, 246, 4, + }, + .{ .skip_encoding = true }, + ); + +} +``` + +
+ +### `rpc.Client` - API Reference + +
+ +A struct which allows you to interact with a Solana cluster via JSON RPC. You can instantiate a client like so: + +```zig +const rpc = @import("sig").rpc; + +const HTTP_ENDPOINT = "https://api.mainnet-beta.solana.com"; + +fn main() !void { + var customHeaders = [_][2][]const u8{ + .{ "Cache-Control", "no-cache" }, + .{ "Authorization", "Bearer " }, + }; + + var client = try rpc.Client.init(allocator, .{ + .http_endpoint = HTTP_ENDPOINT, + .http_headers = &customHeaders, + }); + defer client.deinit(); +} +``` + +
+
+ +
+getAccountInfo - Returns all information associated with the account of provided Pubkey +
+ +**Params:** (address: Pubkey, options: GetAccountInfoOptions) +
+ +**Options** +
+ +```zig +const GetAccountInfoOptions = struct { + commitment: ?types.Commitment = null, + encoding: types.Encoding = .Base64, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; +const Pubkey = sig.core.Pubkey; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + const pubkey = try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"); + + var resp = try client.getAccountInfo(pubkey, .{ .encoding = .Base64 }); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("Account info: {any}", .{resp.result().value.data}); +} +``` + +
+ +
+getBalance - Returns the balance of the account of provided Pubkey +
+ +**Params:** (pubkey: Pubkey) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; +const Pubkey = sig.core.Pubkey; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + const pubkey = try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"); + + var resp = try client.getBalance(pubkey); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("balance info: {any}", .{resp.result().value}); +} +``` + +
+ +
+getBlockHeight - Returns the current block height of the node +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlockHeight(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("block height: {any}", .{resp.result()}); +} +``` + +
+ +
+getBlock - Returns identity and transaction information about a confirmed block in the ledger +
+ +**Params:** (slot: u64, options: GetBlockOptions) + +
+ +**Options** +
+ +```zig +const GetBlockOptions = struct { + commitment: ?types.Commitment = null, + maxSupportedTransactionVersion: i64 = 0, + transactionDetails: []const u8 = "full", + rewards: bool = false, + /// NOTE: must be json for now + encoding: types.Encoding = .Json, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlock(500, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("block info: {any}", .{resp.result()}); +} +``` + +
+ +
+getBlockProduction - Returns recent block production information from the current or previous epoch. +
+ +**Params:** (options: GetBlockOptions) + +
+ +**Options** +
+ +```zig +const GetBlockProductionOptions = struct { + commitment: ?types.Commitment = null, + identity: ?[]const u8 = null, + range: ?struct { + firstSlot: u64, + lastSlot: ?u64, + } = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlockProduction(.{ .identity = "1EWZm7aZYxfZHbyiELXtTgN1yT2vU1HF9d8DWswX2Tp" }); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("block production info: {any}", .{resp.result()}); +} +``` + +
+ +
+getBlockCommitment - Returns commitment for particular block +
+ +**Params:** (slot: u64) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlockCommitment(400); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("block commitment info: {any}", .{resp.result()}); +} +``` + +
+ +
+getBlocks - Returns a list of confirmed blocks between two slots. + +
+ +**Params:** (startSlot: u64, endSlot: ?u64, options: GetBlocksOptions) + +
+ +**Options** +
+ +```zig +const GetBlocksOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlocks(400, 500, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("blocks: {any}", .{resp.result()}); +} +``` + +
+ +
+getBlocksWithLimit - Returns a list of confirmed blocks starting at the given slot +
+ +**Params:** (startSlot: u64, limit: ?u64, options: GetBlocksOptions) + +
+ +**Options** +
+ +```zig +const GetBlocksOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlocksWithLimit(400, 25, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("blocks: {any}", .{resp.result()}); +} +``` + +
+ +
+getBlockTime - Returns the estimated production time of a block +
+ +**Params:** (slot: u64) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getBlockTime(163954396); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("block time: {any}", .{resp.result()}); +} +``` + +
+ +
+getClusterNodes - Returns information about all the nodes participating in the cluster +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getClusterNodes(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("cluster nodes: {any}", .{resp.result()}); +} +``` + +
+ +
+getEpochInfo - Returns information about the current epoch +
+ +**Params:** (options: GetEpochInfoOptions) + +
+ +**Options** +
+ +```zig +const GetEpochInfoOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getEpochInfo(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("epoch info: {any}", .{resp.result()}); +} +``` + +
+ +
+getEpochSchedule - Returns the epoch schedule information from this cluster +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getEpochSchedule(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("epoch schedule: {any}", .{resp.result()}); +} +``` + +
+ +
+getFeeForMessage - Get the fee the network will charge for a particular Message +
+ +**Params:** (message: []const u8, options: GetFeeForMessageOptions) + +
+ +**Options** +
+ +```zig +const GetFeeForMessageOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getFeeForMessage("AQABAgIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEBAQAA", .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("message fee info: {any}", .{resp.result()}); +} +``` + +
+ +
+getFirstAvailableBlock - Returns the slot of the lowest confirmed block that has not been purged from the ledger +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getFirstAvailableBlock(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("first available block: {any}", .{resp.result()}); +} +``` + +
+ +
+getGenesisHash - Returns the genesis hash +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var resp = try client.getGenesisHash(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("genesis hash: {any}", .{resp.result()}); +} +``` + +
+ +
+getHealth - Returns the current health of the node +
+ +_NOTE:_ If one or more --known-validator arguments are provided to solana-validator - "ok" is returned when the node has within HEALTH_CHECK_SLOT_DISTANCE slots of the highest known validator, otherwise an error is returned. "ok" is always returned if no known validators are provided. + +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getHealth(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("health: {any}", .{resp.result()}); +} +``` + +
+ +
+getHighestSnapshotSlot - Returns the highest slot information that the node has snapshots for +
+ +_NOTE:_ This will find the highest full snapshot slot, and the highest incremental snapshot slot based on the full snapshot slot, if there is one. + +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getHighestSnapshotSlot(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("snapshot info: {any}", .{resp.result()}); +} +``` + +
+ +
+getIdentity - Returns the identity pubkey for the current node +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getIdentity(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("indentity info: {any}", .{resp.result()}); +} +``` + +
+ +
+getInflationGovernor - Returns the current inflation governor +
+ +**Params:** (options: GetInflationGovernorOptions) + +
+ +**Options** +
+ +```zig +const GetInflationGovernorOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getInflationGovernor(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("inflation info: {any}", .{resp.result()}); +} +``` + +
+ +
+getInflationRate - Returns the specific inflation values for the current epoch +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getInflationRate(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("inflation rate: {any}", .{resp.result()}); +} +``` + +
+ +
+getInflationReward - Returns the inflation / staking reward for a list of addresses for an epoch +
+ +**Params:** (accounts: []Pubkey, options: GetInflationRewardOptions) + +
+ +**Options** +
+ +```zig +const GetInflationRewardOptions = struct { + commitment: ?types.Commitment = null, + epoch: ?u64 = null, + minContextSlot: ?u64 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var accounts = [2]Pubkey{ + try Pubkey.fromString( + "6dmNQ5jwLeLk5REvio1JcMshcbvkYMwy26sJ8pbkvStu", + ) , + try Pubkey.fromString( + "BGsqMegLpV6n6Ve146sSX2dTjUMj3M92HnU8BbNRMhF2", + ), + }; + var resp = try client.getInflationReward(&accounts, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("inflation reward info: {any}", .{resp.result()}); +} +``` + +
+ +
+getLargestAccounts - Returns the 20 largest accounts, by lamport balance (results may be cached up to two hours) +
+ +**Params:** (options: GetLargestAccountsOptions) + +
+ +**Options** +
+ +```zig +const GetLargestAccountsOptions = struct { + commitment: ?types.Commitment = null, + filter: ?enum { Circulating, NonCirculating } = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getLargestAccounts(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("largest accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+getLatestBlockhash - Returns the latest blockhash +
+. + +**Params:** (options: GetLatestBlockhashOptions) + +
+ +**Options** +
+ +```zig +const GetLatestBlockhashOptions = struct { + commitment: ?types.Commitment = null, + minContextSlot: ?u64 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getLatestBlockhash(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("latest blockhash: {any}", .{resp.result()}); +} +``` + +
+ +
+getLeaderSchedule - Returns the leader schedule for an epoch +
+ +**Params:** (epoch: ?u64, options: GetLeaderScheduleOptions) + +
+ +**Options** +
+ +```zig +const GetLeaderScheduleOptions = struct { + commitment: ?types.Commitment = null, + identity: ?[]const u8 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getLeaderSchedule(null, .{ .identity = "GRmtMtAeSL8HgX1p815ATQjaYU4Sk7XCP21i4yoFd3KS" }); + // defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("leader schedule: {any}", .{resp.result()}); +} +``` + +
+ +
+getMaxRetransmitSlot - Get the max slot seen from retransmit stage +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getMaxRetransmitSlot(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("max retransmit slot: {any}", .{resp.result()}); +} +``` + +
+ +
+getMaxShredInsertSlot - Get the max slot seen from after shred insert +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getMaxShredInsertSlot(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("max shred insert slot: {any}", .{resp.result()}); +} +``` + +
+ +
+getMinimumBalanceForRentExemption - Returns minimum balance required to make account rent exempt +
+ +**Params:** (size: usize) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getMinimumBalanceForRentExemption(1000); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("minimum balance: {any}", .{resp.result()}); +} +``` + +
+ +
+getMultipleAccounts - Returns the account information for a list of Pubkeys +
+ +**Params:** (pubkeys: []Pubkey, options: GetMultipleAccountsOptions) + +
+ +**Options** +
+ +```zig +const GetMultipleAccountsOptions = struct { + commitment: ?types.Commitment = null, + encoding: types.Encoding = .Base64, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var accounts2 = [2]Pubkey{ + try Pubkey.fromString( + "4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa", + ), + try Pubkey.fromString( + "BGsqMegLpV6n6Ve146sSX2dTjUMj3M92HnU8BbNRMhF2", + ), + }; + var resp = try client.getMultipleAccounts(&accounts2, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("multiple accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+getProgramAccounts - Returns all accounts owned by the provided program Pubkey +
+ +**Params:** (pubkeys: []Pubkey, options: GetMultipleAccountsOptions) + +
+ +**Options** +
+ +```zig +pub const GetProgramAccountsOptions = struct { + commitment: ?types.Commitment = null, + /// NOTE: this needs to base64 if want to convert to `core.Account` type + encoding: types.Encoding = .Base64, + minContextSlot: ?u64 = null, + /// NOTE: needs to be true + withContext: bool = true, + dataSlice: ?DataSlice = null, + filters: ?[]Filter = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var filters = [1]Filter{.{ .memcmp = .{ .offset = 0, .bytes = "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v" } }}; + var resp = try client.getProgramAccounts( + try Pubkey.fromString("TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"), + .{ .filters = &filters }, + ); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("program accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+getRecentPerformanceSamples - Returns a list of recent performance samples, in reverse slot order +
+ +_NOTE:_ Performance samples are taken every 60 seconds and include the number of transactions and slots that occur in a given time window. + +**Params:** (limit: ?u64) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getRecentPerformanceSamples(null); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("recent performance samples: {any}", .{resp.result()}); +} +``` + +
+ +
+getRecentPrioritizationFees - Returns a list of prioritization fees from recent blocks +
+ +**Params:** (pubkeys: ?[]Pubkey) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getRecentPrioritizationFees(null); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("recent prioritization fees: {any}", .{resp.result()}); +} +``` + +
+ +
+getSignaturesForAddress - Returns signatures for confirmed transactions that include the given address in their accountKeys list +
+ +_NOTE:_ Returns signatures backwards in time from the provided signature or most recent confirmed block. + +**Params:** (pubkey: Pubkey, options: GetSignaturesForAddressOptions) + +
+ +**Options** +
+ +````zig +pub const GetSignaturesForAddressOptions = struct { + commitment: ?types.Commitment = null, + minContextSlot: ?u64 = null, + limit: u32 = 1000, + before: ?[]const u8 = null, + until: ?[]const u8 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getSignaturesForAddress(try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"), .{ .limit = 10 }); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("signatures: {any}", .{resp.result()}); +} +```` + +
+ +
+getSignatureStatuses - Returns the statuses of a list of signatures +
+ +**Params:** (pubkey: Pubkey, options: GetSignatureStatusesOptions) + +
+ +**Options** +
+ +```zig +const GetSignatureStatusesOptions = struct { + searchTransactionHistory: bool = false, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var signatures = [2][]const u8{ + "3oK4vMqnRbLhdNVq9Cb81JwHim4QaxvgcNEyA4jTySFFtFtBhJgmLwT3rMFAGakKHE9iMiymVNZsTbnrzNjuxXJc", + "5fqHdfeY1GbshDFzTdybqDbR3mwj5tkgHEP28dFWFZDcvQkkJUynVWrsfMYip8SsfAaFYTFmRdeC3K1CQRC7Ukkb", + }; + var resp = try client.getSignatureStatuses(&signatures, .{ .searchTransactionHistory = true }); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("signature statuses: {any}", .{resp.result()}); +} +``` + +
+ +
+getSlotLeader - Returns the current slot leader +
+ +**Params:** (options: GetSlotLeaderOptions) + +
+ +**Options** +
+ +```zig +const GetSlotLeaderOptions = struct { + commitment: ?types.Commitment = null, + minContextSlot: ?u64 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getSlotLeader(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("slot leader: {any}", .{resp.result()}); +} +``` + +
+ +
+getSlotLeaders - Returns the slot leaders for a given slot range +
+ +**Params:** (startSlot: ?u64, limit: ?u64) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getSlotLeaders(193536000, 10); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("slot leaders: {any}", .{resp.result()}); +} +``` + +
+ +
+getStakeActivation - Returns epoch activation information for a stake account +
+ +**Params:** (pubkey: Pubkey, options: GetStakeActivationOptions) + +
+ +**Options** +
+ +```zig +pub const GetStakeActivationOptions = struct { + commitment: ?types.Commitment = null, + minContextSlot: ?u64 = null, + epoch: ?u64 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getStakeActivation(try Pubkey.fromString( + "CWrKSEDYhj6VHGocZowq2BUncKESqD7rdLTSrsoasTjU", + ), .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("stake activation: {any}", .{resp.result()}); +} +``` + +
+ +
+getStakeMinimumDelegation - Returns epoch activation information for a stake account +
+ +**Params:** (options: GetStakeMinimumDelegationOptions) + +
+ +**Options** +
+ +```zig +const GetStakeMinimumDelegationOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getStakeMinimumDelegation(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("min stake delegation: {any}", .{resp.result()}); +} +``` + +
+ +
+getSupply - Returns information about the current supply +
+ +**Params:** (options: GetSupplyOptions) + +
+ +**Options** +
+ +```zig +const GetSupplyOptions = struct { + commitment: ?types.Commitment = null, + excludeNonCirculatingAccountsList: ?bool = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getSupply(.{ .excludeNonCirculatingAccountsList = false }); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("get supply: {any}", .{resp.result()}); +} +``` + +
+ +
+getTokenAccountBalance - Returns the token balance of an SPL Token account +
+ +**Params:** (pubkey: Pubkey, options: GetTokenAccountBalanceOptions) + +
+ +**Options** +
+ +```zig +const GetTokenAccountBalanceOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var pubkey = try Pubkey.fromString( + "6A5NHCj1yF6urc9wZNe6Bcjj4LVszQNj5DwAWG97yzMu", + ); + var resp = try client.getTokenAccountBalance(pubkey, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("token account balance: {any}", .{resp.result()}); +} +``` + +
+ +
+getTokenAccountsByDelegate - Returns all SPL Token accounts by approved Delegate +
+ +**Params:** (pubkey: Pubkey, mintOrProgramId: MintOrProgramIdParam, options: GetTokenAccountsByDelegateOptions) + +
+ +**Options** +
+ +```zig +const MintOrProgramIdParam = struct { + mint: ?Pubkey = null, + programId: ?Pubkey = null, +}; + +const GetTokenAccountsByDelegateOptions = struct { + commitment: ?types.Commitment = null, + encoding: types.Encoding = .Base64, + minContextSlot: ?u64 = null, + dataSlice: ?DataSlice = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var programPubkey = try Pubkey.fromString( + "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA", + ); + var pubkey = try Pubkey.fromString( + "CTz5UMLQm2SRWHzQnU62Pi4yJqbNGjgRBHqqp6oDHfF7", + ); + var resp = try client.getTokenAccountsByDelegate(pubkey, .{ .programId = programPubkey }, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("token accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+getTokenAccountsByOwner - Returns all SPL Token accounts by token owner +
+ +**Params:** (pubkey: Pubkey, mintOrProgramId: MintOrProgramIdParam, options: GetTokenAccountsByOwnerOptions) + +
+ +**Options** +
+ +```zig +const MintOrProgramIdParam = struct { + mint: ?Pubkey = null, + programId: ?Pubkey = null, +}; + +const GetTokenAccountsByOwnerOptions = struct { + commitment: ?types.Commitment = null, + encoding: types.Encoding = .Base64, + minContextSlot: ?u64 = null, + dataSlice: ?DataSlice = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var mintPubkey = try Pubkey.fromString( + "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v", + ); + var pubkey = try Pubkey.fromString( + "CTz5UMLQm2SRWHzQnU62Pi4yJqbNGjgRBHqqp6oDHfF7", + ); + var resp = try client.getTokenAccountsByOwner(pubkey, .{ .mint = mintPubkey }, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("token accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+getTokenLargestAccounts - Returns the 20 largest accounts of a particular SPL Token type +
+ +**Params:** (pubkey: Pubkey, options: GetTokenLargestAccountsOptions) + +
+ +**Options** +
+ +```zig +const GetTokenLargestAccountsOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var mintPubkey = try Pubkey.fromString( + "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v", + ); + var resp = try client.getTokenLargestAccounts(mintPubkey, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("token largest accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+getTokenSupply - Returns the total supply of an SPL Token type +
+ +**Params:** (pubkey: Pubkey, options: GetTokenSupplyOptions) + +
+ +**Options** +
+ +```zig +const GetTokenSupplyOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var mintPubkey = try Pubkey.fromString( + "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v", + ); + var resp = try client.getTokenSupply(mintPubkey, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("token supply: {any}", .{resp.result()}); +} +``` + +
+ +
+getTransaction - Returns transaction details for a confirmed transaction +
+ +**Params:** (signature: []const u8, options: GetTransactionOptions) + +
+ +**Options** +
+ +```zig +const GetTransactionOptions = struct { + commitment: ?types.Commitment = null, + maxSupportedTransactionVersion: u8 = 0, + /// NOTE: must be Json for now + encoding: types.Encoding = .Json, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var txSig = "5UfDuX7WXY18keiz9mZ6zKkY8JyNuLDFz2QycQcr7skRkgVaNmo6tgFbsePRrX5C6crvycJ2A3txSdGgjPHvPbTZ"; + var resp = try client.getTransaction(txSig, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("transaction: {any}", .{resp.result()}); +} +``` + +
+ +
+getTransactionCount - Returns the current Transaction count from the ledger +
+ +**Params:** (options: GetTransactionOptions) + +
+ +**Options** +
+ +```zig +const GetTransactionCountOptions = struct { + commitment: ?types.Commitment = null, + minContextSlot: ?u64 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getTransactionCount(.{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("transaction count: {any}", .{resp.result()}); +} +``` + +
+ +
+getVersion - Returns the current Solana version running on the node +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.getVersion(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("version: {any}", .{resp.result()}); +} +``` + +
+ +
+getVoteAccounts - Returns the account info and associated stake for all the voting accounts in the current bank +
+ +**Params:** (options: GetVoteAccountsOptions) + +
+ +**Options** +
+ +```zig +const GetVoteAccountsOptions = struct { + commitment: ?types.Commitment = null, + votePubkey: ?Pubkey = null, + keepUnstakedDelinquents: ?bool = false, + delinquentSlotDistance: ?u64 = 0, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var votePubkey = try Pubkey.fromString( + "CertusDeBmqN8ZawdkxK5kFGMwBXdudvWHYwtNgNhvLu", + ); + var resp = try client.getVoteAccounts(.{ .votePubkey = votePubkey }); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("vote accounts: {any}", .{resp.result()}); +} +``` + +
+ +
+isBlockhashValid - Returns whether a blockhash is still valid or not +
+ +**Params:** (blockhash: []const u8, options: IsBlockhashValidOptions) + +
+ +**Options** +
+ +```zig +pub const IsBlockhashValidOptions = struct { + commitment: ?types.Commitment = null, + minContextSlot: ?u64 = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.isBlockhashValid("AaPs8sYJjnDLMMAADYj2fPyDyNzp9to9v4J6c5gevxpX", .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("blockhash valid: {any}", .{resp.result()}); +} +``` + +
+ +
+minimumLedgerSlot - Returns the lowest slot that the node has information about in its ledger +
+ +**Params:** None + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.minimumLedgerSlot(); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("minimum ledger slot: {any}", .{resp.result()}); +} +``` + +
+ +
+requestAirdrop - Requests an airdrop of lamports to a Pubkey +
+ +**Params:** (pubkey: Pubkey, lamports: u64, options: RequestAirdropOptions) + +
+ +**Options** +
+ +```zig +const RequestAirdropOptions = struct { + commitment: ?types.Commitment = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var pubkey = try Pubkey.fromString( + "Bvg7GuhqwNmV2JVyeZjhAcTPFqPktfmq25VBaZipozda", + ); + var resp = try client.requestAirdrop(pubkey, 10000, .{}); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("airdrop result: {any}", .{resp.result()}); +} +``` + +
+ +
+sendTransaction - Submits a signed transaction to the cluster for processing +
+ +_NOTE:_ +This method does not alter the transaction in any way; it relays the transaction created by clients to the node as-is. + +If the node's rpc service receives the transaction, this method immediately succeeds, without waiting for any confirmations. A successful response from this method does not guarantee the transaction is processed or confirmed by the cluster. + +While the rpc service will reasonably retry to submit it, the transaction could be rejected if transaction's recent_blockhash expires before it lands. + +Use getSignatureStatuses to ensure a transaction is processed and confirmed. + +Before submitting, the following preflight checks are performed: + +The transaction signatures are verified +The transaction is simulated against the bank slot specified by the preflight commitment. On failure an error will be returned. Preflight checks may be disabled if desired. It is recommended to specify the same commitment and preflight commitment to avoid confusing behavior. +The returned signature is the first signature in the transaction, which is used to identify the transaction (transaction id). This identifier can be easily extracted from the transaction data before submission. + +**Params:** (encoded: []const u8) + +
+ +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.sendTransaction( + "4hXTCkRzt9WyecNzV1XPgCDfGAZzQKNxLXgynz5QDuWWPSAZBZSHptvWRL3BjCvzUXRdKvHL2b7yGrRQcWyaqsaBCncVG7BFggS8w9snUts67BSh3EqKpXLUm5UMHfD7ZBe9GhARjbNQMLJ1QD3Spr6oMTBU6EhdB4RD8CP2xUxr2u3d6fos36PD98XS6oX8TQjLpsMwncs5DAMiD4nNnR8NBfyghGCWvCVifVwvA8B8TJxE1aiyiv2L429BCWfyzAme5sZW8rDb14NeCQHhZbtNqfXhcp2tAnaAT", + .{}, + ); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("tx signature: {any}", .{resp.result()}); +} +``` + +
+ +
+simulateTransaction - Simulate sending a transaction +
+ +**Params:** (encoded: []const u8, options: SimulateTransactionOptions) + +
+ +**Options** +
+ +```zig +const SimulateTransactionOptions = struct { + commitment: ?types.Commitment = null, + /// NOTE: must be base64 for now + encoding: types.Encoding = .Base64, + sigVerify: ?bool = null, + replaceRecentBlockhash: ?[]const u8 = null, + minContextSlot: ?u64 = null, + accounts: ?struct { + addresses: []Pubkey, + /// NOTE: must be base64 for now + encoding: types.Encoding = .Base64, + } = null, +}; +``` + +**Usage** +
+ +```zig +const std = @import("std"); +const sig = @import("sig"); +const rpc = sig.rpc; + + +const allocator = std.heap.page_allocator; + +pub fn main() !void { + var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); + defer client.deinit(); + + var resp = try client.simulateTransaction( + "AdYOLBh+RlElmqIY08YZ4OvkWzmGz5ccQLKOENWccchuSluWO7ZTy6B4x/A/WJAFvSFfUhXEcG/PZajL5EmZBQMBAAEDb3Q4CUF/hTg/MgAsYv45KRoWu+9GafjMndSktv5KzQ3fydC+bF4RL7cMFn8iCnd9sKVJp3K3PwOxVZ3agBBUWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjkczsB8wv5yFAgAKUdvb4irHybi2IEEHJcAJrfdhMfgBAgIAAQwCAAAAgJaYAAAAAAA=", + .{}, + ); + defer resp.deinit(); + + if (resp.err()) |err| { + std.log.err("error response: {any}", .{err}); + return; + } + + std.log.debugf("simulate tx info: {any}", .{resp.result()}); +} +``` + +
diff --git a/readme.md b/readme.md index 63309004c..8e7c09add 100644 --- a/readme.md +++ b/readme.md @@ -12,25 +12,27 @@


-_Sig_ is a Solana validator client implementation written in Zig. +_Sig_ is a Solana validator client implementation written in Zig. Read the [introductory blog post](https://blog.syndica.io/introducing-sig-by-syndica-an-rps-focused-solana-validator-client-written-in-zig/) for more about the goals of this project.

⚠️ NOTE: This is a WIP, please open any issues for any bugs/improvements. -## 📦 Setup +## Development + +### 📦 Setup Zig 0.11.0 is required to build Sig. -
Details +
Dev Environment Recommendations -### Build Dependencies +#### Build Dependencies - Zig 0.11.0 - Choose one: - [Binary Releases](https://ziglang.org/download/) (extract and add to PATH) - [Install with a package manager](https://github.com/ziglang/zig/wiki/Install-Zig-from-a-Package-Manager) - Manage multiple versions with [zigup](https://github.com/marler8997/zigup) or [zvm](https://www.zvm.app/) -### Developer Tools +#### Developer Tools These tools are optional but recommended for a smooth development process. - [Zig Language Server (ZLS) 0.11.0](https://github.com/zigtools/zls/wiki/Installation) @@ -38,7 +40,7 @@ These tools are optional but recommended for a smooth development process. - [Zig Language](https://marketplace.visualstudio.com/items?itemName=ziglang.vscode-zig) VS Code extension - [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb) VS Code extension -#### Visual Studio Code +##### Visual Studio Code If you use VS Code, you should install the [Zig Language](https://marketplace.visualstudio.com/items?itemName=ziglang.vscode-zig) extension. It can use your installed versions of Zig and ZLS, or it can download and manage its own internal versions. @@ -81,7 +83,7 @@ You can use [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadim ] } ``` -

+
To run the debugger, you need a run configuration. This launch.json includes an example for debugging gossip. Customize the args as desired. @@ -103,19 +105,45 @@ To run the debugger, you need a run configuration. This launch.json includes an ] } ``` -
+ -## 🔧 Build +### 🔧 Build ```bash zig build ``` -## ▶️ Usage +### 🧪 Test +Run all tests. +```bash +zig build test +``` + +Include `--summary all` with any test command to see a summary of the test results. + +Include a filter to limit which tests are run. Sig tests include their module name. For example, you can run all tests in `gossip.crds_table` like this: +```bash +zig build test --summary all -- gossip.crds_table +``` + +### 📊 Benchmark +Run all benchmarks. +```bash +zig build benchmark +``` + +Run a benchmark group: socket_utils, gossip, or sync. +```bash +zig build benchmark -- gossip +``` + +### Code +See [CONTRIBUTING.md](docs/CONTRIBUTING.md) for the code style guide. +

-### 🚀 Run Sig +## 🚀 Run Run Sig with `zig` or execute the binary you already built: ```bash @@ -125,8 +153,9 @@ zig build run -- --help ./zig-out/bin/sig --help ``` -For simplicity, the above commands will be abbreviated as `sig` in the rest of this document. An alias can be used to realize this abbreviation in your shell. +These commands are abbreviated as `sig` in this document. An alias can be used to implement this abbreviation in your shell. ```bash +# run in repository root alias sig="$(pwd)/zig-out/bin/sig" ``` @@ -172,68 +201,14 @@ sig gossip --entrypoint 35.197.53.105:8001 \ ```bash sig gossip --entrypoint 35.203.170.30:8001 \ - --entrypoint 139.178.68.207:8001 \ --entrypoint 139.178.68.207:8001 ``` +
+## 📦 Import Sig -### 🧪 Testing -```bash -zig build test --summary all -``` - -## Learn More -[Zig](https://ziglang.org/) -- [Documentation](https://ziglang.org/documentation/0.11.0/) -- [Ziglearn Book](https://ziglearn.org/) -- [Ziglings Exercises](https://github.com/ratfactor/ziglings) - -[Solana](https://solana.com/) -- [Documentation](https://docs.solana.com/validator/anatomy) -- [Code](https://github.com/solana-labs/solana) - -Sig -- [Introduction](https://blog.syndica.io/introducing-sig-by-syndica-an-rps-focused-solana-validator-client-written-in-zig/) -- [Gossip Deep Dive](https://blog.syndica.io/sig-engineering-1-gossip-protocol/) - -## Why Zig? - -Zig's own definition: `Zig is a general-purpose programming language and toolchain for maintaining robust, optimal and reusable software.` - -1. **Optimized performance**: Zig provides control over how your program runs at a low level, similar to languages like C. It allows fine-grained control over aspects such as memory management and system calls, which can lead to improved performance. - -2. **Safety focus**: Zig has features built in to prevent common bugs and safety issues common in C. For example, it includes built-in testing and bounds checking, which can help avoid problems such as buffer overflows and undefined behavior. - -3. **Readability and maintainability**: Zig syntax is designed to be straightforward and clear. This can make the code easier to understand, more maintainable, and less prone to bugs. - -4. **No hidden control flow**: Zig doesn't allow hidden control-flow like exceptions in some other languages. This can make it easier to reason about what your program is doing. - -5. **Integration with C**: Zig has excellent interoperation with C. You can directly include C libraries and headers in a Zig program, which can save time when using existing C libraries. - -6. **Custom allocators**: Zig allows you to define custom memory allocation strategies for different parts of your program. This provides the flexibility to optimize memory usage for specific use-cases or workloads. - -## Notes: - -- Zig is still a evolving language. -- Many of the low-level APIs have been stabilized but `std.http.Client` and `std.json` are still WIP targetting stable implementations by `>=0.11`. -- This library was compiled and tested using `0.11.0-dev.3997+546212ff7` (master). -- Zig is targeting end of Q2 2023 for [`0.11` milestone](https://github.com/ziglang/zig/milestone/17). -- Currently, `std.http.Client` [leaks](https://github.com/ziglang/zig/blob/447a30299073ce88b7b26d18d060a345beac5276/lib/std/http/Client.zig#L913) and is failing some tests, fix is in works. -
-
- -## Modules: - -- **Gossip** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` - -- **Core** - Core data structures shared across modules - -- **RPC Client** - A fully featured HTTP RPC client with ability to query all on-chain data along with sending transactions - -## Installation - -Add `Sig` to your Zig project using `build.zig.zon` file (available for Zig >= 0.11). +Sig can be included as a dependency in your Zig project using `build.zig.zon` file (available for Zig >= 0.11). See the [API documentation](docs/api.md) to learn more about how to use Sig as a library.
Steps - how to install Sig in your Zig project @@ -303,2440 +278,52 @@ Add `Sig` to your Zig project using `build.zig.zon` file (available for Zig >= 0 ```
-
- -## Usage - -### `core.Pubkey` - API Reference - -A struct which holds a Public Key of a Solana account (`[32]u8`). - -
- -From a string: - -```zig -const Pubkey = @import("sig").core.Pubkey; - -fn main() !void { - - const pubkey = try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"); - -} -``` - -
- -From raw bytes: - -```zig -const Pubkey = @import("sig").core.Pubkey; - -fn main() !void { - - // Automatically encodes and caches the string value - const pubkey = try Pubkey.fromBytes( - &[32]u8{ - 44, 64, 232, 153, 35, 67, 7, 9, 46, 6, 87, 76, 55, 55, 65, 5, - 99, 0, 48, 64, 75, 8, 127, 53, 57, 12, 7, 54, 8, 133, 246, 4, - }, - .{}, - ); - - - // Optionally skip encoding if (in the rare scenario) you will never call the string() method, you can - // set this option to true and it will not decode & cache the encoded value. This can be helpful in - // scenarios where you plan to only use the bytes and want to save on expensive base58 encoding. - const pubkey = try Pubkey.fromBytes( - &[32]u8{ - 44, 64, 232, 153, 35, 67, 7, 9, 46, 6, 87, 76, 55, 55, 65, 5, - 99, 0, 48, 64, 75, 8, 127, 53, 57, 12, 7, 54, 8, 133, 246, 4, - }, - .{ .skip_encoding = true }, - ); - -} -``` - -
- -### `rpc.Client` - API Reference - -
- -A struct which allows you to interact with a Solana cluster via JSON RPC. You can instantiate a client like so: - -```zig -const rpc = @import("sig").rpc; - -const HTTP_ENDPOINT = "https://api.mainnet-beta.solana.com"; - -fn main() !void { - var customHeaders = [_][2][]const u8{ - .{ "Cache-Control", "no-cache" }, - .{ "Authorization", "Bearer " }, - }; - - var client = try rpc.Client.init(allocator, .{ - .http_endpoint = HTTP_ENDPOINT, - .http_headers = &customHeaders, - }); - defer client.deinit(); -} -``` - -
-
- -
-getAccountInfo - Returns all information associated with the account of provided Pubkey -
- -**Params:** (address: Pubkey, options: GetAccountInfoOptions) -
- -**Options** -
- -```zig -const GetAccountInfoOptions = struct { - commitment: ?types.Commitment = null, - encoding: types.Encoding = .Base64, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; -const Pubkey = sig.core.Pubkey; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - const pubkey = try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"); - - var resp = try client.getAccountInfo(pubkey, .{ .encoding = .Base64 }); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("Account info: {any}", .{resp.result().value.data}); -} -``` - -
- -
-getBalance - Returns the balance of the account of provided Pubkey -
- -**Params:** (pubkey: Pubkey) - -
+
-**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; -const Pubkey = sig.core.Pubkey; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - const pubkey = try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"); - - var resp = try client.getBalance(pubkey); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("balance info: {any}", .{resp.result().value}); -} -``` - -
- -
-getBlockHeight - Returns the current block height of the node -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getBlockHeight(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("block height: {any}", .{resp.result()}); -} -``` - -
- -
-getBlock - Returns identity and transaction information about a confirmed block in the ledger -
- -**Params:** (slot: u64, options: GetBlockOptions) - -
- -**Options** -
- -```zig -const GetBlockOptions = struct { - commitment: ?types.Commitment = null, - maxSupportedTransactionVersion: i64 = 0, - transactionDetails: []const u8 = "full", - rewards: bool = false, - /// NOTE: must be json for now - encoding: types.Encoding = .Json, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getBlock(500, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("block info: {any}", .{resp.result()}); -} -``` - -
- -
-getBlockProduction - Returns recent block production information from the current or previous epoch. -
- -**Params:** (options: GetBlockOptions) - -
- -**Options** -
- -```zig -const GetBlockProductionOptions = struct { - commitment: ?types.Commitment = null, - identity: ?[]const u8 = null, - range: ?struct { - firstSlot: u64, - lastSlot: ?u64, - } = null, -}; -``` +## 🤔 Why Zig? -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getBlockProduction(.{ .identity = "1EWZm7aZYxfZHbyiELXtTgN1yT2vU1HF9d8DWswX2Tp" }); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("block production info: {any}", .{resp.result()}); -} -``` - -
- -
-getBlockCommitment - Returns commitment for particular block -
- -**Params:** (slot: u64) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getBlockCommitment(400); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("block commitment info: {any}", .{resp.result()}); -} -``` - -
- -
-getBlocks - Returns a list of confirmed blocks between two slots. - -
- -**Params:** (startSlot: u64, endSlot: ?u64, options: GetBlocksOptions) +Zig's own definition: `Zig is a general-purpose programming language and toolchain for maintaining robust, optimal and reusable software.` -
+1. **Optimized performance**: Zig provides control over how your program runs at a low level, similar to languages like C. It allows fine-grained control over aspects such as memory management and system calls, which can lead to improved performance. -**Options** -
+2. **Safety focus**: Zig has features built in to prevent common bugs and safety issues common in C. For example, it includes built-in testing and bounds checking, which can help avoid problems such as buffer overflows and undefined behavior. -```zig -const GetBlocksOptions = struct { - commitment: ?types.Commitment = null, -}; -``` +3. **Readability and maintainability**: Zig syntax is designed to be straightforward and clear. This can make the code easier to understand, more maintainable, and less prone to bugs. -**Usage** -
+4. **No hidden control flow**: Zig doesn't allow hidden control-flow like exceptions in some other languages. This can make it easier to reason about what your program is doing. -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; +5. **Integration with C**: Zig has excellent interoperation with C. You can directly include C libraries and headers in a Zig program, which can save time when using existing C libraries. +6. **Custom allocators**: Zig allows you to define custom memory allocation strategies for different parts of your program. This provides the flexibility to optimize memory usage for specific use-cases or workloads. -const allocator = std.heap.page_allocator; +### Note -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); +- Zig is still a evolving language. +- Many of the low-level APIs have been stabilized but `std.http.Client` and `std.json` are still WIP targetting stable implementations by `>=0.11`. +- This library was compiled and tested using `0.11.0-dev.3997+546212ff7` (master). +- Currently, `std.http.Client` [leaks](https://github.com/ziglang/zig/blob/447a30299073ce88b7b26d18d060a345beac5276/lib/std/http/Client.zig#L913) and is failing some tests, fix is in works. +

- var resp = try client.getBlocks(400, 500, .{}); - defer resp.deinit(); +## 🧩 Modules - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } +- **Gossip** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` - std.log.debugf("blocks: {any}", .{resp.result()}); -} -``` +- **Core** - Core data structures shared across modules -
+- **RPC Client** - A fully featured HTTP RPC client with ability to query all on-chain data along with sending transactions +

-
-getBlocksWithLimit - Returns a list of confirmed blocks starting at the given slot -
+## 📚 Learn More +[Zig](https://ziglang.org/) +- [Official Documentation](https://ziglang.org/documentation/0.11.0/) +- [Ziglearn Book](https://ziglearn.org/) +- [Ziglings Exercises](https://github.com/ratfactor/ziglings) -**Params:** (startSlot: u64, limit: ?u64, options: GetBlocksOptions) +[Solana](https://solana.com/) +- [Validator Anatomy](https://docs.solana.com/validator/anatomy) +- [RPC API](https://docs.solana.com/api) +- [Code](https://github.com/solana-labs/solana) -
- -**Options** -
- -```zig -const GetBlocksOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getBlocksWithLimit(400, 25, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("blocks: {any}", .{resp.result()}); -} -``` - -
- -
-getBlockTime - Returns the estimated production time of a block -
- -**Params:** (slot: u64) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getBlockTime(163954396); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("block time: {any}", .{resp.result()}); -} -``` - -
- -
-getClusterNodes - Returns information about all the nodes participating in the cluster -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getClusterNodes(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("cluster nodes: {any}", .{resp.result()}); -} -``` - -
- -
-getEpochInfo - Returns information about the current epoch -
- -**Params:** (options: GetEpochInfoOptions) - -
- -**Options** -
- -```zig -const GetEpochInfoOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getEpochInfo(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("epoch info: {any}", .{resp.result()}); -} -``` - -
- -
-getEpochSchedule - Returns the epoch schedule information from this cluster -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getEpochSchedule(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("epoch schedule: {any}", .{resp.result()}); -} -``` - -
- -
-getFeeForMessage - Get the fee the network will charge for a particular Message -
- -**Params:** (message: []const u8, options: GetFeeForMessageOptions) - -
- -**Options** -
- -```zig -const GetFeeForMessageOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getFeeForMessage("AQABAgIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEBAQAA", .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("message fee info: {any}", .{resp.result()}); -} -``` - -
- -
-getFirstAvailableBlock - Returns the slot of the lowest confirmed block that has not been purged from the ledger -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getFirstAvailableBlock(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("first available block: {any}", .{resp.result()}); -} -``` - -
- -
-getGenesisHash - Returns the genesis hash -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var resp = try client.getGenesisHash(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("genesis hash: {any}", .{resp.result()}); -} -``` - -
- -
-getHealth - Returns the current health of the node -
- -_NOTE:_ If one or more --known-validator arguments are provided to solana-validator - "ok" is returned when the node has within HEALTH_CHECK_SLOT_DISTANCE slots of the highest known validator, otherwise an error is returned. "ok" is always returned if no known validators are provided. - -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getHealth(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("health: {any}", .{resp.result()}); -} -``` - -
- -
-getHighestSnapshotSlot - Returns the highest slot information that the node has snapshots for -
- -_NOTE:_ This will find the highest full snapshot slot, and the highest incremental snapshot slot based on the full snapshot slot, if there is one. - -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getHighestSnapshotSlot(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("snapshot info: {any}", .{resp.result()}); -} -``` - -
- -
-getIdentity - Returns the identity pubkey for the current node -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getIdentity(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("indentity info: {any}", .{resp.result()}); -} -``` - -
- -
-getInflationGovernor - Returns the current inflation governor -
- -**Params:** (options: GetInflationGovernorOptions) - -
- -**Options** -
- -```zig -const GetInflationGovernorOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getInflationGovernor(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("inflation info: {any}", .{resp.result()}); -} -``` - -
- -
-getInflationRate - Returns the specific inflation values for the current epoch -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getInflationRate(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("inflation rate: {any}", .{resp.result()}); -} -``` - -
- -
-getInflationReward - Returns the inflation / staking reward for a list of addresses for an epoch -
- -**Params:** (accounts: []Pubkey, options: GetInflationRewardOptions) - -
- -**Options** -
- -```zig -const GetInflationRewardOptions = struct { - commitment: ?types.Commitment = null, - epoch: ?u64 = null, - minContextSlot: ?u64 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var accounts = [2]Pubkey{ - try Pubkey.fromString( - "6dmNQ5jwLeLk5REvio1JcMshcbvkYMwy26sJ8pbkvStu", - ) , - try Pubkey.fromString( - "BGsqMegLpV6n6Ve146sSX2dTjUMj3M92HnU8BbNRMhF2", - ), - }; - var resp = try client.getInflationReward(&accounts, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("inflation reward info: {any}", .{resp.result()}); -} -``` - -
- -
-getLargestAccounts - Returns the 20 largest accounts, by lamport balance (results may be cached up to two hours) -
- -**Params:** (options: GetLargestAccountsOptions) - -
- -**Options** -
- -```zig -const GetLargestAccountsOptions = struct { - commitment: ?types.Commitment = null, - filter: ?enum { Circulating, NonCirculating } = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getLargestAccounts(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("largest accounts: {any}", .{resp.result()}); -} -``` - -
- -
-getLatestBlockhash - Returns the latest blockhash -
-. - -**Params:** (options: GetLatestBlockhashOptions) - -
- -**Options** -
- -```zig -const GetLatestBlockhashOptions = struct { - commitment: ?types.Commitment = null, - minContextSlot: ?u64 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getLatestBlockhash(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("latest blockhash: {any}", .{resp.result()}); -} -``` - -
- -
-getLeaderSchedule - Returns the leader schedule for an epoch -
- -**Params:** (epoch: ?u64, options: GetLeaderScheduleOptions) - -
- -**Options** -
- -```zig -const GetLeaderScheduleOptions = struct { - commitment: ?types.Commitment = null, - identity: ?[]const u8 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getLeaderSchedule(null, .{ .identity = "GRmtMtAeSL8HgX1p815ATQjaYU4Sk7XCP21i4yoFd3KS" }); - // defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("leader schedule: {any}", .{resp.result()}); -} -``` - -
- -
-getMaxRetransmitSlot - Get the max slot seen from retransmit stage -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getMaxRetransmitSlot(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("max retransmit slot: {any}", .{resp.result()}); -} -``` - -
- -
-getMaxShredInsertSlot - Get the max slot seen from after shred insert -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getMaxShredInsertSlot(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("max shred insert slot: {any}", .{resp.result()}); -} -``` - -
- -
-getMinimumBalanceForRentExemption - Returns minimum balance required to make account rent exempt -
- -**Params:** (size: usize) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getMinimumBalanceForRentExemption(1000); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("minimum balance: {any}", .{resp.result()}); -} -``` - -
- -
-getMultipleAccounts - Returns the account information for a list of Pubkeys -
- -**Params:** (pubkeys: []Pubkey, options: GetMultipleAccountsOptions) - -
- -**Options** -
- -```zig -const GetMultipleAccountsOptions = struct { - commitment: ?types.Commitment = null, - encoding: types.Encoding = .Base64, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var accounts2 = [2]Pubkey{ - try Pubkey.fromString( - "4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa", - ), - try Pubkey.fromString( - "BGsqMegLpV6n6Ve146sSX2dTjUMj3M92HnU8BbNRMhF2", - ), - }; - var resp = try client.getMultipleAccounts(&accounts2, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("multiple accounts: {any}", .{resp.result()}); -} -``` - -
- -
-getProgramAccounts - Returns all accounts owned by the provided program Pubkey -
- -**Params:** (pubkeys: []Pubkey, options: GetMultipleAccountsOptions) - -
- -**Options** -
- -```zig -pub const GetProgramAccountsOptions = struct { - commitment: ?types.Commitment = null, - /// NOTE: this needs to base64 if want to convert to `core.Account` type - encoding: types.Encoding = .Base64, - minContextSlot: ?u64 = null, - /// NOTE: needs to be true - withContext: bool = true, - dataSlice: ?DataSlice = null, - filters: ?[]Filter = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var filters = [1]Filter{.{ .memcmp = .{ .offset = 0, .bytes = "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v" } }}; - var resp = try client.getProgramAccounts( - try Pubkey.fromString("TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"), - .{ .filters = &filters }, - ); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("program accounts: {any}", .{resp.result()}); -} -``` - -
- -
-getRecentPerformanceSamples - Returns a list of recent performance samples, in reverse slot order -
- -_NOTE:_ Performance samples are taken every 60 seconds and include the number of transactions and slots that occur in a given time window. - -**Params:** (limit: ?u64) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getRecentPerformanceSamples(null); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("recent performance samples: {any}", .{resp.result()}); -} -``` - -
- -
-getRecentPrioritizationFees - Returns a list of prioritization fees from recent blocks -
- -**Params:** (pubkeys: ?[]Pubkey) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getRecentPrioritizationFees(null); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("recent prioritization fees: {any}", .{resp.result()}); -} -``` - -
- -
-getSignaturesForAddress - Returns signatures for confirmed transactions that include the given address in their accountKeys list -
- -_NOTE:_ Returns signatures backwards in time from the provided signature or most recent confirmed block. - -**Params:** (pubkey: Pubkey, options: GetSignaturesForAddressOptions) - -
- -**Options** -
- -````zig -pub const GetSignaturesForAddressOptions = struct { - commitment: ?types.Commitment = null, - minContextSlot: ?u64 = null, - limit: u32 = 1000, - before: ?[]const u8 = null, - until: ?[]const u8 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getSignaturesForAddress(try Pubkey.fromString("4rL4RCWHz3iNCdCaveD8KcHfV9YWGsqSHFPo7X2zBNwa"), .{ .limit = 10 }); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("signatures: {any}", .{resp.result()}); -} -```` - -
- -
-getSignatureStatuses - Returns the statuses of a list of signatures -
- -**Params:** (pubkey: Pubkey, options: GetSignatureStatusesOptions) - -
- -**Options** -
- -```zig -const GetSignatureStatusesOptions = struct { - searchTransactionHistory: bool = false, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var signatures = [2][]const u8{ - "3oK4vMqnRbLhdNVq9Cb81JwHim4QaxvgcNEyA4jTySFFtFtBhJgmLwT3rMFAGakKHE9iMiymVNZsTbnrzNjuxXJc", - "5fqHdfeY1GbshDFzTdybqDbR3mwj5tkgHEP28dFWFZDcvQkkJUynVWrsfMYip8SsfAaFYTFmRdeC3K1CQRC7Ukkb", - }; - var resp = try client.getSignatureStatuses(&signatures, .{ .searchTransactionHistory = true }); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("signature statuses: {any}", .{resp.result()}); -} -``` - -
- -
-getSlotLeader - Returns the current slot leader -
- -**Params:** (options: GetSlotLeaderOptions) - -
- -**Options** -
- -```zig -const GetSlotLeaderOptions = struct { - commitment: ?types.Commitment = null, - minContextSlot: ?u64 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getSlotLeader(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("slot leader: {any}", .{resp.result()}); -} -``` - -
- -
-getSlotLeaders - Returns the slot leaders for a given slot range -
- -**Params:** (startSlot: ?u64, limit: ?u64) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getSlotLeaders(193536000, 10); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("slot leaders: {any}", .{resp.result()}); -} -``` - -
- -
-getStakeActivation - Returns epoch activation information for a stake account -
- -**Params:** (pubkey: Pubkey, options: GetStakeActivationOptions) - -
- -**Options** -
- -```zig -pub const GetStakeActivationOptions = struct { - commitment: ?types.Commitment = null, - minContextSlot: ?u64 = null, - epoch: ?u64 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getStakeActivation(try Pubkey.fromString( - "CWrKSEDYhj6VHGocZowq2BUncKESqD7rdLTSrsoasTjU", - ), .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("stake activation: {any}", .{resp.result()}); -} -``` - -
- -
-getStakeMinimumDelegation - Returns epoch activation information for a stake account -
- -**Params:** (options: GetStakeMinimumDelegationOptions) - -
- -**Options** -
- -```zig -const GetStakeMinimumDelegationOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getStakeMinimumDelegation(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("min stake delegation: {any}", .{resp.result()}); -} -``` - -
- -
-getSupply - Returns information about the current supply -
- -**Params:** (options: GetSupplyOptions) - -
- -**Options** -
- -```zig -const GetSupplyOptions = struct { - commitment: ?types.Commitment = null, - excludeNonCirculatingAccountsList: ?bool = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getSupply(.{ .excludeNonCirculatingAccountsList = false }); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("get supply: {any}", .{resp.result()}); -} -``` - -
- -
-getTokenAccountBalance - Returns the token balance of an SPL Token account -
- -**Params:** (pubkey: Pubkey, options: GetTokenAccountBalanceOptions) - -
- -**Options** -
- -```zig -const GetTokenAccountBalanceOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var pubkey = try Pubkey.fromString( - "6A5NHCj1yF6urc9wZNe6Bcjj4LVszQNj5DwAWG97yzMu", - ); - var resp = try client.getTokenAccountBalance(pubkey, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("token account balance: {any}", .{resp.result()}); -} -``` - -
- -
-getTokenAccountsByDelegate - Returns all SPL Token accounts by approved Delegate -
- -**Params:** (pubkey: Pubkey, mintOrProgramId: MintOrProgramIdParam, options: GetTokenAccountsByDelegateOptions) - -
- -**Options** -
- -```zig -const MintOrProgramIdParam = struct { - mint: ?Pubkey = null, - programId: ?Pubkey = null, -}; - -const GetTokenAccountsByDelegateOptions = struct { - commitment: ?types.Commitment = null, - encoding: types.Encoding = .Base64, - minContextSlot: ?u64 = null, - dataSlice: ?DataSlice = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var programPubkey = try Pubkey.fromString( - "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA", - ); - var pubkey = try Pubkey.fromString( - "CTz5UMLQm2SRWHzQnU62Pi4yJqbNGjgRBHqqp6oDHfF7", - ); - var resp = try client.getTokenAccountsByDelegate(pubkey, .{ .programId = programPubkey }, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("token accounts: {any}", .{resp.result()}); -} -``` - -
- -
-getTokenAccountsByOwner - Returns all SPL Token accounts by token owner -
- -**Params:** (pubkey: Pubkey, mintOrProgramId: MintOrProgramIdParam, options: GetTokenAccountsByOwnerOptions) - -
- -**Options** -
- -```zig -const MintOrProgramIdParam = struct { - mint: ?Pubkey = null, - programId: ?Pubkey = null, -}; - -const GetTokenAccountsByOwnerOptions = struct { - commitment: ?types.Commitment = null, - encoding: types.Encoding = .Base64, - minContextSlot: ?u64 = null, - dataSlice: ?DataSlice = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var mintPubkey = try Pubkey.fromString( - "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v", - ); - var pubkey = try Pubkey.fromString( - "CTz5UMLQm2SRWHzQnU62Pi4yJqbNGjgRBHqqp6oDHfF7", - ); - var resp = try client.getTokenAccountsByOwner(pubkey, .{ .mint = mintPubkey }, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("token accounts: {any}", .{resp.result()}); -} -``` - -
- -
-getTokenLargestAccounts - Returns the 20 largest accounts of a particular SPL Token type -
- -**Params:** (pubkey: Pubkey, options: GetTokenLargestAccountsOptions) - -
- -**Options** -
- -```zig -const GetTokenLargestAccountsOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var mintPubkey = try Pubkey.fromString( - "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v", - ); - var resp = try client.getTokenLargestAccounts(mintPubkey, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("token largest accounts: {any}", .{resp.result()}); -} -``` - -
- -
-getTokenSupply - Returns the total supply of an SPL Token type -
- -**Params:** (pubkey: Pubkey, options: GetTokenSupplyOptions) - -
- -**Options** -
- -```zig -const GetTokenSupplyOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var mintPubkey = try Pubkey.fromString( - "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v", - ); - var resp = try client.getTokenSupply(mintPubkey, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("token supply: {any}", .{resp.result()}); -} -``` - -
- -
-getTransaction - Returns transaction details for a confirmed transaction -
- -**Params:** (signature: []const u8, options: GetTransactionOptions) - -
- -**Options** -
- -```zig -const GetTransactionOptions = struct { - commitment: ?types.Commitment = null, - maxSupportedTransactionVersion: u8 = 0, - /// NOTE: must be Json for now - encoding: types.Encoding = .Json, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var txSig = "5UfDuX7WXY18keiz9mZ6zKkY8JyNuLDFz2QycQcr7skRkgVaNmo6tgFbsePRrX5C6crvycJ2A3txSdGgjPHvPbTZ"; - var resp = try client.getTransaction(txSig, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("transaction: {any}", .{resp.result()}); -} -``` - -
- -
-getTransactionCount - Returns the current Transaction count from the ledger -
- -**Params:** (options: GetTransactionOptions) - -
- -**Options** -
- -```zig -const GetTransactionCountOptions = struct { - commitment: ?types.Commitment = null, - minContextSlot: ?u64 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getTransactionCount(.{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("transaction count: {any}", .{resp.result()}); -} -``` - -
- -
-getVersion - Returns the current Solana version running on the node -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.getVersion(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("version: {any}", .{resp.result()}); -} -``` - -
- -
-getVoteAccounts - Returns the account info and associated stake for all the voting accounts in the current bank -
- -**Params:** (options: GetVoteAccountsOptions) - -
- -**Options** -
- -```zig -const GetVoteAccountsOptions = struct { - commitment: ?types.Commitment = null, - votePubkey: ?Pubkey = null, - keepUnstakedDelinquents: ?bool = false, - delinquentSlotDistance: ?u64 = 0, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var votePubkey = try Pubkey.fromString( - "CertusDeBmqN8ZawdkxK5kFGMwBXdudvWHYwtNgNhvLu", - ); - var resp = try client.getVoteAccounts(.{ .votePubkey = votePubkey }); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("vote accounts: {any}", .{resp.result()}); -} -``` - -
- -
-isBlockhashValid - Returns whether a blockhash is still valid or not -
- -**Params:** (blockhash: []const u8, options: IsBlockhashValidOptions) - -
- -**Options** -
- -```zig -pub const IsBlockhashValidOptions = struct { - commitment: ?types.Commitment = null, - minContextSlot: ?u64 = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.isBlockhashValid("AaPs8sYJjnDLMMAADYj2fPyDyNzp9to9v4J6c5gevxpX", .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("blockhash valid: {any}", .{resp.result()}); -} -``` - -
- -
-minimumLedgerSlot - Returns the lowest slot that the node has information about in its ledger -
- -**Params:** None - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.minimumLedgerSlot(); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("minimum ledger slot: {any}", .{resp.result()}); -} -``` - -
- -
-requestAirdrop - Requests an airdrop of lamports to a Pubkey -
- -**Params:** (pubkey: Pubkey, lamports: u64, options: RequestAirdropOptions) - -
- -**Options** -
- -```zig -const RequestAirdropOptions = struct { - commitment: ?types.Commitment = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var pubkey = try Pubkey.fromString( - "Bvg7GuhqwNmV2JVyeZjhAcTPFqPktfmq25VBaZipozda", - ); - var resp = try client.requestAirdrop(pubkey, 10000, .{}); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("airdrop result: {any}", .{resp.result()}); -} -``` - -
- -
-sendTransaction - Submits a signed transaction to the cluster for processing -
- -_NOTE:_ -This method does not alter the transaction in any way; it relays the transaction created by clients to the node as-is. - -If the node's rpc service receives the transaction, this method immediately succeeds, without waiting for any confirmations. A successful response from this method does not guarantee the transaction is processed or confirmed by the cluster. - -While the rpc service will reasonably retry to submit it, the transaction could be rejected if transaction's recent_blockhash expires before it lands. - -Use getSignatureStatuses to ensure a transaction is processed and confirmed. - -Before submitting, the following preflight checks are performed: - -The transaction signatures are verified -The transaction is simulated against the bank slot specified by the preflight commitment. On failure an error will be returned. Preflight checks may be disabled if desired. It is recommended to specify the same commitment and preflight commitment to avoid confusing behavior. -The returned signature is the first signature in the transaction, which is used to identify the transaction (transaction id). This identifier can be easily extracted from the transaction data before submission. - -**Params:** (encoded: []const u8) - -
- -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.sendTransaction( - "4hXTCkRzt9WyecNzV1XPgCDfGAZzQKNxLXgynz5QDuWWPSAZBZSHptvWRL3BjCvzUXRdKvHL2b7yGrRQcWyaqsaBCncVG7BFggS8w9snUts67BSh3EqKpXLUm5UMHfD7ZBe9GhARjbNQMLJ1QD3Spr6oMTBU6EhdB4RD8CP2xUxr2u3d6fos36PD98XS6oX8TQjLpsMwncs5DAMiD4nNnR8NBfyghGCWvCVifVwvA8B8TJxE1aiyiv2L429BCWfyzAme5sZW8rDb14NeCQHhZbtNqfXhcp2tAnaAT", - .{}, - ); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("tx signature: {any}", .{resp.result()}); -} -``` - -
- -
-simulateTransaction - Simulate sending a transaction -
- -**Params:** (encoded: []const u8, options: SimulateTransactionOptions) - -
- -**Options** -
- -```zig -const SimulateTransactionOptions = struct { - commitment: ?types.Commitment = null, - /// NOTE: must be base64 for now - encoding: types.Encoding = .Base64, - sigVerify: ?bool = null, - replaceRecentBlockhash: ?[]const u8 = null, - minContextSlot: ?u64 = null, - accounts: ?struct { - addresses: []Pubkey, - /// NOTE: must be base64 for now - encoding: types.Encoding = .Base64, - } = null, -}; -``` - -**Usage** -
- -```zig -const std = @import("std"); -const sig = @import("sig"); -const rpc = sig.rpc; - - -const allocator = std.heap.page_allocator; - -pub fn main() !void { - var client = try rpc.Client.init(allocator, .{ .http_endpoint = HTTP_ENDPOINT }); - defer client.deinit(); - - var resp = try client.simulateTransaction( - "AdYOLBh+RlElmqIY08YZ4OvkWzmGz5ccQLKOENWccchuSluWO7ZTy6B4x/A/WJAFvSFfUhXEcG/PZajL5EmZBQMBAAEDb3Q4CUF/hTg/MgAsYv45KRoWu+9GafjMndSktv5KzQ3fydC+bF4RL7cMFn8iCnd9sKVJp3K3PwOxVZ3agBBUWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjkczsB8wv5yFAgAKUdvb4irHybi2IEEHJcAJrfdhMfgBAgIAAQwCAAAAgJaYAAAAAAA=", - .{}, - ); - defer resp.deinit(); - - if (resp.err()) |err| { - std.log.err("error response: {any}", .{err}); - return; - } - - std.log.debugf("simulate tx info: {any}", .{resp.result()}); -} -``` - -
+Sig +- [Introduction](https://blog.syndica.io/introducing-sig-by-syndica-an-rps-focused-solana-validator-client-written-in-zig/) +- [Gossip Deep Dive](https://blog.syndica.io/sig-engineering-1-gossip-protocol/) From 99bf2433a03281c2896a27e75139d7da0a2afec7 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 4 Dec 2023 15:45:53 -0500 Subject: [PATCH 42/72] docs: readme organization --- readme.md | 71 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/readme.md b/readme.md index 8e7c09add..c06737fb7 100644 --- a/readme.md +++ b/readme.md @@ -18,21 +18,19 @@ _Sig_ is a Solana validator client implementation written in Zig. Read the [intr ⚠️ NOTE: This is a WIP, please open any issues for any bugs/improvements. -## Development - -### 📦 Setup +## 📦 Setup Zig 0.11.0 is required to build Sig.
Dev Environment Recommendations -#### Build Dependencies +### Build Dependencies - Zig 0.11.0 - Choose one: - [Binary Releases](https://ziglang.org/download/) (extract and add to PATH) - [Install with a package manager](https://github.com/ziglang/zig/wiki/Install-Zig-from-a-Package-Manager) - Manage multiple versions with [zigup](https://github.com/marler8997/zigup) or [zvm](https://www.zvm.app/) -#### Developer Tools +### Developer Tools These tools are optional but recommended for a smooth development process. - [Zig Language Server (ZLS) 0.11.0](https://github.com/zigtools/zls/wiki/Installation) @@ -40,7 +38,7 @@ These tools are optional but recommended for a smooth development process. - [Zig Language](https://marketplace.visualstudio.com/items?itemName=ziglang.vscode-zig) VS Code extension - [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb) VS Code extension -##### Visual Studio Code +#### Visual Studio Code If you use VS Code, you should install the [Zig Language](https://marketplace.visualstudio.com/items?itemName=ziglang.vscode-zig) extension. It can use your installed versions of Zig and ZLS, or it can download and manage its own internal versions. @@ -109,40 +107,13 @@ To run the debugger, you need a run configuration. This launch.json includes an
-### 🔧 Build - -```bash -zig build -``` - -### 🧪 Test -Run all tests. -```bash -zig build test -``` - -Include `--summary all` with any test command to see a summary of the test results. - -Include a filter to limit which tests are run. Sig tests include their module name. For example, you can run all tests in `gossip.crds_table` like this: -```bash -zig build test --summary all -- gossip.crds_table -``` -### 📊 Benchmark -Run all benchmarks. -```bash -zig build benchmark -``` +## 🔧 Build -Run a benchmark group: socket_utils, gossip, or sync. ```bash -zig build benchmark -- gossip +zig build ``` -### Code -See [CONTRIBUTING.md](docs/CONTRIBUTING.md) for the code style guide. -

- ## 🚀 Run Run Sig with `zig` or execute the binary you already built: @@ -206,6 +177,36 @@ sig gossip --entrypoint 35.203.170.30:8001 \
+## Develop + +See [Setup](#-setup) for information about setting up your development environment. See [CONTRIBUTING.md](docs/CONTRIBUTING.md) for the code style guide. + +### 🧪 Test +Run all tests. +```bash +zig build test +``` + +Include `--summary all` with any test command to see a summary of the test results. + +Include a filter to limit which tests are run. Sig tests include their module name. For example, you can run all tests in `gossip.crds_table` like this: +```bash +zig build test --summary all -- gossip.crds_table +``` + +### 📊 Benchmark +Run all benchmarks. +```bash +zig build benchmark +``` + +Run a benchmark group: socket_utils, gossip, or sync. +```bash +zig build benchmark -- gossip +``` +

+ + ## 📦 Import Sig Sig can be included as a dependency in your Zig project using `build.zig.zon` file (available for Zig >= 0.11). See the [API documentation](docs/api.md) to learn more about how to use Sig as a library. From e89a6a47a06ac8c36d67d7372586ed6e071a1faa Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 4 Dec 2023 16:04:18 -0500 Subject: [PATCH 43/72] docs: readme tweaks --- readme.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/readme.md b/readme.md index c06737fb7..9f3a149eb 100644 --- a/readme.md +++ b/readme.md @@ -124,24 +124,18 @@ zig build run -- --help ./zig-out/bin/sig --help ``` -These commands are abbreviated as `sig` in this document. An alias can be used to implement this abbreviation in your shell. -```bash -# run in repository root -alias sig="$(pwd)/zig-out/bin/sig" -``` +These commands will be abbreviated as `sig` in the rest of this document. ### 👤 Identity -Sig stores its private key in `~/.sig/identity.key`. On its first run, Sig will automatically generate a key if no key exists. - -To see the public key, use the `identity` subcommand: +Sig stores its private key in `~/.sig/identity.key`. On its first run, Sig will automatically generate a key if no key exists. To see the public key, use the `identity` subcommand. ```bash sig identity ``` ### 📞 Gossip -To run Sig as a Solana gossip client, use the `gossip` subcommand. Specify entrypoints to connect to a cluster. Optionally use `-p` to specify a custom listening port (default is 8001). +To run Sig as a Solana gossip client, use the `gossip` subcommand. Specify entrypoints to connect to a cluster. Optionally use `-p` to specify a custom listening port (default is 8001). For more info about gossip, see the [readme](src/gossip/readme.md). ```bash sig gossip -p --entrypoint : ``` @@ -175,7 +169,6 @@ sig gossip --entrypoint 35.203.170.30:8001 \ --entrypoint 139.178.68.207:8001 ``` -
## Develop @@ -307,7 +300,7 @@ Zig's own definition: `Zig is a general-purpose programming language and toolcha ## 🧩 Modules -- **Gossip** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` +- **[Gossip](src/gossip/readme.md)** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` - **Core** - Core data structures shared across modules From a0a2eb3b822ddf52be4bed3ac9bcbea9c98db617 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 4 Dec 2023 16:30:34 -0500 Subject: [PATCH 44/72] docs: readme tweaks --- readme.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/readme.md b/readme.md index 9f3a149eb..1b1918836 100644 --- a/readme.md +++ b/readme.md @@ -12,17 +12,17 @@


-_Sig_ is a Solana validator client implementation written in Zig. Read the [introductory blog post](https://blog.syndica.io/introducing-sig-by-syndica-an-rps-focused-solana-validator-client-written-in-zig/) for more about the goals of this project. +_Sig_ is a Solana validator client implemented in Zig. Read the [introductory blog post](https://blog.syndica.io/introducing-sig-by-syndica-an-rps-focused-solana-validator-client-written-in-zig/) for more about the goals of this project.

⚠️ NOTE: This is a WIP, please open any issues for any bugs/improvements. -## 📦 Setup +## 📋 Setup Zig 0.11.0 is required to build Sig. -
Dev Environment Recommendations +
Dev Environment Details ### Build Dependencies - Zig 0.11.0 - Choose one: @@ -46,7 +46,7 @@ You can use [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadim
tasks.json -```json +```yaml { // See https://go.microsoft.com/fwlink/?LinkId=733558 // for the documentation about the tasks.json format @@ -168,11 +168,11 @@ sig gossip --entrypoint 35.197.53.105:8001 \ sig gossip --entrypoint 35.203.170.30:8001 \ --entrypoint 139.178.68.207:8001 ``` -
+

## Develop -See [Setup](#-setup) for information about setting up your development environment. See [CONTRIBUTING.md](docs/CONTRIBUTING.md) for the code style guide. +See [Setup](#-setup) to get your environment set up. See [CONTRIBUTING.md](docs/CONTRIBUTING.md) for the code style guide. ### 🧪 Test Run all tests. @@ -197,7 +197,7 @@ Run a benchmark group: socket_utils, gossip, or sync. ```bash zig build benchmark -- gossip ``` -

+
## 📦 Import Sig @@ -292,19 +292,18 @@ Zig's own definition: `Zig is a general-purpose programming language and toolcha ### Note -- Zig is still a evolving language. +- Zig is still an evolving language. - Many of the low-level APIs have been stabilized but `std.http.Client` and `std.json` are still WIP targetting stable implementations by `>=0.11`. -- This library was compiled and tested using `0.11.0-dev.3997+546212ff7` (master). - Currently, `std.http.Client` [leaks](https://github.com/ziglang/zig/blob/447a30299073ce88b7b26d18d060a345beac5276/lib/std/http/Client.zig#L913) and is failing some tests, fix is in works.

## 🧩 Modules -- **[Gossip](src/gossip/readme.md)** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` +- **[Gossip](src/gossip)** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` -- **Core** - Core data structures shared across modules +- **[Core](src/core)** - Core data structures shared across modules -- **RPC Client** - A fully featured HTTP RPC client with ability to query all on-chain data along with sending transactions +- **[RPC Client](src/rpc)** - A fully featured HTTP RPC client with ability to query all on-chain data along with sending transactions

## 📚 Learn More From 2497012b9e79d596c336ca885b2599032d9a4225 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 4 Dec 2023 16:55:58 -0500 Subject: [PATCH 45/72] docs(readme): endpoint ip address explanation and source --- readme.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readme.md b/readme.md index 1b1918836..3852fba8f 100644 --- a/readme.md +++ b/readme.md @@ -140,6 +140,8 @@ To run Sig as a Solana gossip client, use the `gossip` subcommand. Specify entry sig gossip -p --entrypoint : ``` +The following IP addresses were resolved from domains found at https://docs.solana.com/clusters. Sig currently only works with IP addresses, not domain names. +
mainnet ```bash From 64dd26d9c02cc5b8c5ca79e68116482f1a25145b Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 11:36:35 -0500 Subject: [PATCH 46/72] docs(readme): reorganize setup section --- readme.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/readme.md b/readme.md index 3852fba8f..25c4b0fec 100644 --- a/readme.md +++ b/readme.md @@ -20,17 +20,16 @@ _Sig_ is a Solana validator client implemented in Zig. Read the [introductory bl ## 📋 Setup -Zig 0.11.0 is required to build Sig. - -
Dev Environment Details - ### Build Dependencies - Zig 0.11.0 - Choose one: - [Binary Releases](https://ziglang.org/download/) (extract and add to PATH) - [Install with a package manager](https://github.com/ziglang/zig/wiki/Install-Zig-from-a-Package-Manager) - Manage multiple versions with [zigup](https://github.com/marler8997/zigup) or [zvm](https://www.zvm.app/) -### Developer Tools +
+ +### Developer Tools + These tools are optional but recommended for a smooth development process. - [Zig Language Server (ZLS) 0.11.0](https://github.com/zigtools/zls/wiki/Installation) From 528e7f91becc89fc54d842b21c0bf29a6d314642 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 11:43:51 -0500 Subject: [PATCH 47/72] docs(readme): add link to api docs for rpc client in module section --- readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 25c4b0fec..f985153d2 100644 --- a/readme.md +++ b/readme.md @@ -302,9 +302,9 @@ Zig's own definition: `Zig is a general-purpose programming language and toolcha - **[Gossip](src/gossip)** - A gossip spy node, run by: `sig gossip` or `zig build run -- gossip` -- **[Core](src/core)** - Core data structures shared across modules +- **[Core](src/core)** - Core data structures shared across modules. -- **[RPC Client](src/rpc)** - A fully featured HTTP RPC client with ability to query all on-chain data along with sending transactions +- **[RPC Client](src/rpc)** ([docs](docs/api.md#rpcclient---api-reference)) - A fully featured HTTP RPC client with ability to query all on-chain data along with sending transactions.

## 📚 Learn More From 18ea8e742903bba22a76e2755df86b0a3e37f080 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 29 Nov 2023 19:13:32 -0500 Subject: [PATCH 48/72] fix(gossip): verifyPackets tasks index out of bounds with multiple batches. treat array as ring buffer --- src/gossip/gossip_service.zig | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 6d6ba205e..ab4733e84 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -337,6 +337,14 @@ pub const GossipService = struct { }; self.verified_incoming_channel.send(msg) catch unreachable; } + + /// waits for the task to be done, then resets the done state to false + fn awaitAndReset(self: *VerifyMessageTask) void { + while (!self.done.load(std.atomic.Ordering.Acquire)) { + // wait + } + self.done.store(false, std.atomic.Ordering.Release); + } }; /// main logic for deserializing Packets into Protocol messages @@ -380,7 +388,10 @@ pub const GossipService = struct { var count: usize = 0; for (packet_batches) |*packet_batch| { for (packet_batch.items) |*packet| { - var task = tasks[count]; + var task = tasks[count % socket_utils.PACKETS_PER_BATCH]; + if (count > socket_utils.PACKETS_PER_BATCH) { + task.awaitAndReset(); + } task.packet = packet; const batch = Batch.from(&task.task); @@ -390,11 +401,8 @@ pub const GossipService = struct { } } - for (tasks[0..count]) |task| { - while (!task.done.load(std.atomic.Ordering.Acquire)) { - // wait - } - task.done.store(false, std.atomic.Ordering.Release); + for (tasks[0..@min(count, socket_utils.PACKETS_PER_BATCH)]) |task| { + task.awaitAndReset(); } } From 4fddff3238674dfec21d581ce7f053e9f20d0f7e Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 11:17:22 -0500 Subject: [PATCH 49/72] fix(gossip): fix off-by-one error that was causing segfaults and add assert to help expose issues like this --- src/gossip/gossip_service.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index ab4733e84..576d1e825 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -307,6 +307,7 @@ pub const GossipService = struct { pub fn callback(task: *Task) void { var self = @fieldParentPtr(@This(), "task", task); + std.debug.assert(!self.done.load(std.atomic.Ordering.Acquire)); defer self.done.store(true, std.atomic.Ordering.Release); var protocol_message = bincode.readFromSlice( @@ -389,7 +390,7 @@ pub const GossipService = struct { for (packet_batches) |*packet_batch| { for (packet_batch.items) |*packet| { var task = tasks[count % socket_utils.PACKETS_PER_BATCH]; - if (count > socket_utils.PACKETS_PER_BATCH) { + if (count >= socket_utils.PACKETS_PER_BATCH) { task.awaitAndReset(); } task.packet = packet; From f0d8f551500c81744b10d44aef070ea98d5c9710 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 14:28:05 -0500 Subject: [PATCH 50/72] fix(lru): segfault in LruCache.pop the problem was that it would deinit the node before returning node data. this almost always worked fine because you would need the allocator to reuse or unmap the memory to actually see a problem, and this was typically not happening before the return statement on the next line of code was executed. but on rare occasion the address would be unmapped, causing a segfault. the solution is to defer the deinit so it happens after the return statement copies the value --- src/common/lru.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index 01bdc867f..33d82f434 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -157,7 +157,7 @@ pub fn LruCache(comptime K: type, comptime V: type) type { if (self.hashmap.fetchSwapRemove(k)) |kv| { var node = kv.value; self.dbl_link_list.remove(node); - self.deinitNode(node); + defer self.deinitNode(node); return node.data.value; } return null; From 0df7988cd2cea7115e40c366990343f83426f1aa Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 20:17:06 -0500 Subject: [PATCH 51/72] fix(bincode): deserialization failure (ShredType) ### Root cause All enums are deserialized with the assumption they have a size of 32 bits in bincode. ShredType however is a struct in rust and enum in zig. It is serialized as 8 bits. This causes deserialization errors reading the data from mainnet because it grabs 32 bits and reads a much larger number than any of the enum variants in zig. ### Solution Add logic to bincode implementation that looks for a declaration called BincodeSize within the enum. If it exists, use that instead of u32. Set it to u8 for ShredType. --- src/bincode/bincode.zig | 10 +++++++--- src/gossip/crds.zig | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/bincode/bincode.zig b/src/bincode/bincode.zig index 28125c5ff..3ca0e3c46 100644 --- a/src/bincode/bincode.zig +++ b/src/bincode/bincode.zig @@ -118,10 +118,14 @@ pub fn Deserializer(comptime Reader: type) type { } pub fn deserializeEnum(self: *Self, ally: ?std.mem.Allocator, visitor: anytype) Error!@TypeOf(visitor).Value { - const T = u32; // enum size + const T = @TypeOf(visitor).Value; + comptime var SerializedSize = u32; + comptime if (@hasDecl(T, "BincodeSize")) { + SerializedSize = T.BincodeSize; + }; const tag = switch (self.params.endian) { - .Little => self.reader.readIntLittle(T), - .Big => self.reader.readIntBig(T), + .Little => self.reader.readIntLittle(SerializedSize), + .Big => self.reader.readIntBig(SerializedSize), } catch { return Error.IO; }; diff --git a/src/gossip/crds.zig b/src/gossip/crds.zig index a913ae6f6..c9cca83e8 100644 --- a/src/gossip/crds.zig +++ b/src/gossip/crds.zig @@ -766,6 +766,8 @@ pub const NodeInstance = struct { pub const ShredType = enum(u32) { Data = 0b1010_0101, Code = 0b0101_1010, + + pub const BincodeSize = u8; }; pub const DuplicateShred = struct { From fc8de7373ba36a25712749d612e2d979fac01d9d Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 21:50:28 -0500 Subject: [PATCH 52/72] fix(gossip): serialize ShredType to one byte, just like deserialization does now. --- src/gossip/crds.zig | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gossip/crds.zig b/src/gossip/crds.zig index c9cca83e8..923d62802 100644 --- a/src/gossip/crds.zig +++ b/src/gossip/crds.zig @@ -763,11 +763,24 @@ pub const NodeInstance = struct { } }; -pub const ShredType = enum(u32) { +pub const ShredType = enum(u8) { Data = 0b1010_0101, Code = 0b0101_1010, + /// Enables bincode deserializer to deserialize this data from a single byte instead of 4. pub const BincodeSize = u8; + + /// Enables bincode serializer to serialize this data into a single byte instead of 4. + pub const @"getty.sb" = struct { + pub fn serialize( + allocator: ?std.mem.Allocator, + value: anytype, + serializer: anytype, + ) @TypeOf(serializer).Error!@TypeOf(serializer).Ok { + _ = allocator; + return try serializer.serializeInt(@intFromEnum(value)); + } + }; }; pub const DuplicateShred = struct { From 987b50c37742c8cf1e868d013df6a4017c0c46a5 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 6 Dec 2023 17:37:26 -0500 Subject: [PATCH 53/72] fix(gossip): #50 Double free in ActiveSet # Root cause Multiple contacts with the same pubkey were being passed to the rotate method. This resulted in duplicate items being included in the array, but the hashmap internally prevents there from being duplicates. So the next time rotate is called, it would iterate through the array and attempt to deinit the same hashmap entry twice. # Solution Duplicates are detected and skipped during the insertion loop by checking for their presence in the hashmap. # Alternate Solution This could have also been addressed with a simpler approach: Remove the peers field from ActiveSet. This field does not seem necessary, since the same data is already in the hashmap. Even though it would be simpler code, it is a more significant change in behavior and compatibility. To be safe, I decided not to take this approach. --- src/gossip/active_set.zig | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index 9f760d0e0..2e0956d06 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -70,8 +70,12 @@ pub const ActiveSet = struct { pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); - for (0..size) |i| { - self.peers[i] = crds_peers[i].id; + var tgt: u8 = 0; + for (0..size) |src| { + if (self.pruned_peers.contains(crds_peers[src].id)) { + continue; + } + self.peers[tgt] = crds_peers[src].id; // *full* hard restart on blooms -- labs doesnt do this - bug? var bloom = try Bloom.random( @@ -80,9 +84,10 @@ pub const ActiveSet = struct { BLOOM_FALSE_RATE, BLOOM_MAX_BITS, ); - try self.pruned_peers.put(self.peers[i], bloom); + try self.pruned_peers.put(self.peers[tgt], bloom); + tgt += 1; } - self.len = size; + self.len = tgt; } pub fn prune(self: *Self, from: Pubkey, origin: Pubkey) void { @@ -177,3 +182,24 @@ test "gossip.active_set: init/deinit" { defer fanout_with_prune.deinit(); try std.testing.expectEqual(no_prune_fanout_len, fanout_with_prune.items.len + 1); } + +// This used to cause a double free when rotating after duplicate ids were inserted +// because there were two entries in the array but only one entry in the hashmap. +// Now the logic prevents duplicates, and this test prevents regressions. +test "gossip.active_set: gracefully rotates with duplicate contact ids" { + var alloc = std.testing.allocator; + + var rng = std.rand.DefaultPrng.init(100); + var gossip_peers = try std.ArrayList(crds.LegacyContactInfo).initCapacity(alloc, 10); + defer gossip_peers.deinit(); + + var data = crds.LegacyContactInfo.random(rng.random()); + var dupe = crds.LegacyContactInfo.random(rng.random()); + dupe.id = data.id; + try gossip_peers.append(data); + try gossip_peers.append(dupe); + + var active_set = ActiveSet.init(alloc); + defer active_set.deinit(); + try active_set.rotate(gossip_peers.items); +} From 164a88490488370368bb1a44959fea49599ad703 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 7 Dec 2023 11:18:36 -0500 Subject: [PATCH 54/72] remove peers and len fields from ActiveSet this is the alternate solution for the double free bug fix. --- src/gossip/active_set.zig | 35 ++++++++++++++++------------------- src/gossip/gossip_service.zig | 9 +++++---- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index 2e0956d06..2f6f83186 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -28,9 +28,7 @@ const BLOOM_MAX_BITS: usize = 1024 * 8 * 4; pub const ActiveSet = struct { // store pubkeys as keys in crds table bc the data can change - peers: [NUM_ACTIVE_SET_ENTRIES]Pubkey, pruned_peers: std.AutoHashMap(Pubkey, Bloom), - len: u8 = 0, allocator: std.mem.Allocator, const Self = @This(); @@ -38,16 +36,19 @@ pub const ActiveSet = struct { pub fn init( allocator: std.mem.Allocator, ) Self { - return Self{ .peers = undefined, .pruned_peers = std.AutoHashMap(Pubkey, Bloom).init(allocator), .len = 0, .allocator = allocator }; + return Self{ .pruned_peers = std.AutoHashMap(Pubkey, Bloom).init(allocator), .allocator = allocator }; } pub fn deinit(self: *Self) void { - for (self.peers[0..self.len]) |peer| { - var entry = self.pruned_peers.getEntry(peer).?; + var iter = self.pruned_peers.iterator(); + while (iter.next()) |entry| { entry.value_ptr.deinit(); } self.pruned_peers.deinit(); - self.len = 0; + } + + pub inline fn len(self: *const Self) u32 { + return self.pruned_peers.unmanaged.size; } pub fn rotate( @@ -55,11 +56,10 @@ pub const ActiveSet = struct { crds_peers: []crds.LegacyContactInfo, ) error{OutOfMemory}!void { // clear the existing - for (self.peers[0..self.len]) |peer| { - var entry = self.pruned_peers.getEntry(peer).?; + var iter = self.pruned_peers.iterator(); + while (iter.next()) |entry| { entry.value_ptr.deinit(); } - self.len = 0; self.pruned_peers.clearRetainingCapacity(); if (crds_peers.len == 0) { @@ -70,12 +70,10 @@ pub const ActiveSet = struct { pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); - var tgt: u8 = 0; for (0..size) |src| { if (self.pruned_peers.contains(crds_peers[src].id)) { continue; } - self.peers[tgt] = crds_peers[src].id; // *full* hard restart on blooms -- labs doesnt do this - bug? var bloom = try Bloom.random( @@ -84,10 +82,8 @@ pub const ActiveSet = struct { BLOOM_FALSE_RATE, BLOOM_MAX_BITS, ); - try self.pruned_peers.put(self.peers[tgt], bloom); - tgt += 1; + try self.pruned_peers.put(crds_peers[src].id, bloom); } - self.len = tgt; } pub fn prune(self: *Self, from: Pubkey, origin: Pubkey) void { @@ -111,17 +107,17 @@ pub const ActiveSet = struct { errdefer active_set_endpoints.deinit(); // change to while loop - for (self.peers[0..self.len]) |peer_pubkey| { + var iter = self.pruned_peers.iterator(); + while (iter.next()) |entry| { // lookup peer contact info const peer_info = crds_table.get(crds.CrdsValueLabel{ - .LegacyContactInfo = peer_pubkey, + .LegacyContactInfo = entry.key_ptr.*, }) orelse continue; // peer pubkey could have been removed from the crds table const peer_gossip_addr = peer_info.value.data.LegacyContactInfo.gossip; crds.sanitizeSocket(&peer_gossip_addr) catch continue; // check if peer has been pruned - const entry = self.pruned_peers.getEntry(peer_pubkey) orelse unreachable; const origin_bytes = origin.data; if (entry.value_ptr.contains(&origin_bytes)) { continue; @@ -166,7 +162,7 @@ test "gossip.active_set: init/deinit" { defer active_set.deinit(); try active_set.rotate(gossip_peers.items); - try std.testing.expect(active_set.len == CRDS_GOSSIP_PUSH_FANOUT); + try std.testing.expect(active_set.len() == CRDS_GOSSIP_PUSH_FANOUT); const origin = Pubkey.random(rng.random(), .{}); @@ -175,7 +171,8 @@ test "gossip.active_set: init/deinit" { const no_prune_fanout_len = fanout.items.len; try std.testing.expect(no_prune_fanout_len > 0); - const peer_pubkey = active_set.peers[0]; + var iter = active_set.pruned_peers.keyIterator(); + const peer_pubkey = iter.next().?.*; active_set.prune(peer_pubkey, origin); var fanout_with_prune = try active_set.getFanoutPeers(alloc, origin, &crds_table); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 576d1e825..493bd271f 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -796,7 +796,7 @@ pub const GossipService = struct { var active_set: *const ActiveSet = active_set_lock.get(); defer active_set_lock.unlock(); - if (active_set.len == 0) return null; + if (active_set.len() == 0) return null; for (crds_entries) |entry| { const value = entry.value; @@ -1861,8 +1861,9 @@ test "gossip.gossip_service: tests handle_prune_messages" { var as_lock = gossip_service.active_set_rw.read(); var as: *const ActiveSet = as_lock.get(); - try std.testing.expect(as.len > 0); // FIX - var peer0 = as.peers[0]; + try std.testing.expect(as.len() > 0); // FIX + var iter = as.pruned_peers.keyIterator(); + const peer0 = iter.next().?.*; as_lock.unlock(); var prunes = [_]Pubkey{Pubkey.random(rng.random(), .{})}; @@ -2221,7 +2222,7 @@ test "gossip.gossip_service: test build_push_messages" { var as: *ActiveSet = as_lock.mut(); try as.rotate(peers.items); as_lock.unlock(); - try std.testing.expect(as.len > 0); + try std.testing.expect(as.len() > 0); } { From 86412c8bede6e8c962f285cdb99b31008311cbdb Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 7 Dec 2023 12:11:18 -0500 Subject: [PATCH 55/72] fix(gossip): address review in double free pr #55 - remove comment - use HashMap.count for ActiveSet.len - use getOrPut to avoid repeated lookups in ActiveSet.rotate --- src/gossip/active_set.zig | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index 2f6f83186..2b634d4f7 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -47,8 +47,8 @@ pub const ActiveSet = struct { self.pruned_peers.deinit(); } - pub inline fn len(self: *const Self) u32 { - return self.pruned_peers.unmanaged.size; + pub fn len(self: *const Self) u32 { + return self.pruned_peers.count(); } pub fn rotate( @@ -70,19 +70,18 @@ pub const ActiveSet = struct { pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); - for (0..size) |src| { - if (self.pruned_peers.contains(crds_peers[src].id)) { - continue; + for (0..size) |i| { + var entry = try self.pruned_peers.getOrPut(crds_peers[i].id); + if (entry.found_existing == false) { + // *full* hard restart on blooms -- labs doesnt do this - bug? + var bloom = try Bloom.random( + self.allocator, + bloom_num_items, + BLOOM_FALSE_RATE, + BLOOM_MAX_BITS, + ); + entry.value_ptr.* = bloom; } - - // *full* hard restart on blooms -- labs doesnt do this - bug? - var bloom = try Bloom.random( - self.allocator, - bloom_num_items, - BLOOM_FALSE_RATE, - BLOOM_MAX_BITS, - ); - try self.pruned_peers.put(crds_peers[src].id, bloom); } } @@ -180,9 +179,6 @@ test "gossip.active_set: init/deinit" { try std.testing.expectEqual(no_prune_fanout_len, fanout_with_prune.items.len + 1); } -// This used to cause a double free when rotating after duplicate ids were inserted -// because there were two entries in the array but only one entry in the hashmap. -// Now the logic prevents duplicates, and this test prevents regressions. test "gossip.active_set: gracefully rotates with duplicate contact ids" { var alloc = std.testing.allocator; From dfd85a0cec19a3103fc653b3a3cb7de17f2182ef Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 8 Dec 2023 17:01:23 -0500 Subject: [PATCH 56/72] improve task allocation/using threadpool code --- src/gossip/crds_table.zig | 24 +++++++-------------- src/gossip/gossip_service.zig | 40 +++++++++++++++-------------------- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index fd71a0100..62f734748 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -635,38 +635,30 @@ pub const CrdsTable = struct { const cutoff_timestamp = now -| timeout; const n_pubkeys = self.pubkey_to_values.count(); - var tasks = try std.ArrayList(*GetOldLabelsTask).initCapacity(self.allocator, n_pubkeys); + var tasks = try self.allocator.alloc(GetOldLabelsTask, n_pubkeys); defer { - for (tasks.items) |task| { - task.deinit(); - self.allocator.destroy(task); - } - tasks.deinit(); + for (tasks) |*task| task.deinit(); + self.allocator.free(tasks); } // run this loop in parallel - for (self.pubkey_to_values.keys()[0..n_pubkeys]) |key| { + for (self.pubkey_to_values.keys()[0..n_pubkeys], 0..n_pubkeys) |key, i| { var old_labels = std.ArrayList(CrdsValueLabel).init(self.allocator); - var task = GetOldLabelsTask{ + tasks[i] = GetOldLabelsTask{ .key = key, .crds_table = self, .cutoff_timestamp = cutoff_timestamp, .old_labels = old_labels, }; - // alloc on heap - var task_heap = try self.allocator.create(GetOldLabelsTask); - task_heap.* = task; - tasks.appendAssumeCapacity(task_heap); - // run it - const batch = Batch.from(&task_heap.task); + const batch = Batch.from(&tasks[i].task); ThreadPool.schedule(self.thread_pool, batch); } // wait for them to be done to release the lock var output_length: u64 = 0; - for (tasks.items) |task| { + for (tasks) |*task| { while (!task.done.load(std.atomic.Ordering.Acquire)) { // wait } @@ -675,7 +667,7 @@ pub const CrdsTable = struct { // move labels to one big array var output = try std.ArrayList(CrdsValueLabel).initCapacity(self.allocator, output_length); - for (tasks.items) |task| { + for (tasks) |*task| { output.appendSliceAssumeCapacity(task.old_labels.items); } diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 493bd271f..34bcff703 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -352,23 +352,18 @@ pub const GossipService = struct { /// and verifing they have valid values, and have valid signatures. /// Verified Protocol messages are then sent to the verified_channel. fn verifyPackets(self: *Self) !void { - var tasks: [socket_utils.PACKETS_PER_BATCH]*VerifyMessageTask = undefined; + var tasks = try self.allocator.alloc(VerifyMessageTask, socket_utils.PACKETS_PER_BATCH); + defer self.allocator.free(tasks); + // pre-allocate all the tasks - for (0..tasks.len) |i| { - var verify_task_heap = try self.allocator.create(VerifyMessageTask); - verify_task_heap.* = VerifyMessageTask{ + for (tasks) |*task| { + task.* = VerifyMessageTask{ .task = .{ .callback = VerifyMessageTask.callback }, .allocator = self.allocator, .verified_incoming_channel = self.verified_incoming_channel, .packet = &Packet.default(), .logger = self.logger, }; - tasks[i] = verify_task_heap; - } - defer { - for (tasks) |task| { - self.allocator.destroy(task); - } } while (!self.exit.load(std.atomic.Ordering.Unordered)) { @@ -389,7 +384,7 @@ pub const GossipService = struct { var count: usize = 0; for (packet_batches) |*packet_batch| { for (packet_batch.items) |*packet| { - var task = tasks[count % socket_utils.PACKETS_PER_BATCH]; + var task = &tasks[count % socket_utils.PACKETS_PER_BATCH]; if (count >= socket_utils.PACKETS_PER_BATCH) { task.awaitAndReset(); } @@ -402,7 +397,7 @@ pub const GossipService = struct { } } - for (tasks[0..@min(count, socket_utils.PACKETS_PER_BATCH)]) |task| { + for (tasks[0..@min(count, socket_utils.PACKETS_PER_BATCH)]) |*task| { task.awaitAndReset(); } } @@ -1107,12 +1102,11 @@ pub const GossipService = struct { // create the pull requests const n_valid_requests = valid_indexs.items.len; - var tasks = try ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); + + var tasks = try self.allocator.alloc(PullRequestTask, n_valid_requests); defer { - for (tasks.items) |task| { - self.allocator.destroy(task); - } - tasks.deinit(); + for (tasks) |*task| task.deinit(); + self.allocator.free(tasks); } { @@ -1122,10 +1116,10 @@ pub const GossipService = struct { var output_limit = std.atomic.Atomic(i64).init(MAX_NUM_CRDS_VALUES_PULL_RESPONSE); + var task_index: usize = 0; for (valid_indexs.items) |i| { // create the thread task - var task_heap = try self.allocator.create(PullRequestTask); - task_heap.* = PullRequestTask{ + tasks[task_index] = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, .my_pubkey = &self.my_pubkey, .from_endpoint = &pull_requests.items[i].from_endpoint, @@ -1136,22 +1130,22 @@ pub const GossipService = struct { .allocator = self.allocator, .output_limit = &output_limit, }; - tasks.appendAssumeCapacity(task_heap); + task_index += 1; // run it - const batch = Batch.from(&task_heap.task); + const batch = Batch.from(&tasks[task_index].task); ThreadPool.schedule(self.thread_pool, batch); } // wait for them to be done to release the lock - for (tasks.items) |task| { + for (tasks) |*task| { while (!task.done.load(std.atomic.Ordering.Acquire)) { // wait } } } - for (tasks.items) |task| { + for (tasks) |*task| { if (task.output.items.len > 0) { // TODO: should only need one mux lock in this loop try self.packet_outgoing_channel.send(task.output); From bdf7ebacb8ea4412dc05a489105d4d0c28398dd6 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 8 Dec 2023 17:15:58 -0500 Subject: [PATCH 57/72] small cleanups on the prev commit --- src/gossip/crds_table.zig | 2 +- src/gossip/gossip_service.zig | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 62f734748..8638af861 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -642,7 +642,7 @@ pub const CrdsTable = struct { } // run this loop in parallel - for (self.pubkey_to_values.keys()[0..n_pubkeys], 0..n_pubkeys) |key, i| { + for (self.pubkey_to_values.keys()[0..n_pubkeys], 0..) |key, i| { var old_labels = std.ArrayList(CrdsValueLabel).init(self.allocator); tasks[i] = GetOldLabelsTask{ .key = key, diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 34bcff703..d2b8a9722 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -1116,8 +1116,7 @@ pub const GossipService = struct { var output_limit = std.atomic.Atomic(i64).init(MAX_NUM_CRDS_VALUES_PULL_RESPONSE); - var task_index: usize = 0; - for (valid_indexs.items) |i| { + for (valid_indexs.items, 0..) |i, task_index| { // create the thread task tasks[task_index] = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, @@ -1130,7 +1129,6 @@ pub const GossipService = struct { .allocator = self.allocator, .output_limit = &output_limit, }; - task_index += 1; // run it const batch = Batch.from(&tasks[task_index].task); From 10873b91d23b71d9c14e0560d5b53958f1780b2e Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 11 Dec 2023 17:26:34 -0500 Subject: [PATCH 58/72] readme: remove sentence about not supporting domain names --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index f985153d2..5ab4959ae 100644 --- a/readme.md +++ b/readme.md @@ -139,7 +139,7 @@ To run Sig as a Solana gossip client, use the `gossip` subcommand. Specify entry sig gossip -p --entrypoint : ``` -The following IP addresses were resolved from domains found at https://docs.solana.com/clusters. Sig currently only works with IP addresses, not domain names. +The following IP addresses were resolved from domains found at https://docs.solana.com/clusters.
mainnet From 78ca244a58dcda35a3f2a61ca301b111dadb70d8 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 13 Dec 2023 12:56:02 -0500 Subject: [PATCH 59/72] incorp comments --- src/gossip/fuzz.zig | 10 ++++---- src/gossip/gossip_service.zig | 45 ++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 9b134a1a3..d44021891 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -8,7 +8,7 @@ const socket_utils = @import("./socket_utils.zig"); const _gossip_service = @import("./gossip_service.zig"); const GossipService = _gossip_service.GossipService; const ChunkType = _gossip_service.ChunkType; -const crds_values_to_packets = _gossip_service.crdsValuesToPackets; +const crdsValuesToPackets = _gossip_service.crdsValuesToPackets; const MAX_PUSH_MESSAGE_PAYLOAD_SIZE = _gossip_service.MAX_PUSH_MESSAGE_PAYLOAD_SIZE; const Logger = @import("../trace/log.zig").Logger; @@ -119,14 +119,14 @@ pub fn randomPushMessage(rng: std.rand.Random, keypair: *const KeyPair, to_addr: } const allocator = std.heap.page_allocator; - const packets = try crds_values_to_packets( + const packets = try crdsValuesToPackets( allocator, &Pubkey.fromPublicKey(&keypair.public_key, false), &crds_values, &to_addr, ChunkType.PushMessage, ); - return packets.?; + return packets; } pub fn randomPullResponse(rng: std.rand.Random, keypair: *const KeyPair, to_addr: EndPoint) !std.ArrayList(Packet) { @@ -139,14 +139,14 @@ pub fn randomPullResponse(rng: std.rand.Random, keypair: *const KeyPair, to_addr } const allocator = std.heap.page_allocator; - const packets = try crds_values_to_packets( + const packets = try crdsValuesToPackets( allocator, &Pubkey.fromPublicKey(&keypair.public_key, false), &crds_values, &to_addr, ChunkType.PullResponse, ); - return packets.?; + return packets; } pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, keypair: *const KeyPair, to_addr: EndPoint) !Packet { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index d2b8a9722..a34efbc04 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -148,7 +148,7 @@ pub const GossipService = struct { } var thread_pool = try allocator.create(ThreadPool); - var n_threads = @min(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 8); + var n_threads = @min(@as(u32, @truncate(std.Thread.getCpuCount() catch 1)), 8); thread_pool.* = ThreadPool.init(.{ .max_threads = n_threads, .stack_size = 2 * 1024 * 1024, @@ -736,8 +736,8 @@ pub const GossipService = struct { defer valid_gossip_indexs.deinit(); var valid_gossip_peers: [NUM_ACTIVE_SET_ENTRIES]LegacyContactInfo = undefined; - for (valid_gossip_indexs.items) |i| { - valid_gossip_peers[i] = gossip_peers[i]; + for (0.., valid_gossip_indexs.items) |i, valid_gossip_index| { + valid_gossip_peers[i] = gossip_peers[valid_gossip_index]; } // send pings to peers @@ -754,7 +754,7 @@ pub const GossipService = struct { /// logic for building new push messages which are sent to peers from the /// active set and serialized into packets. - fn buildPushMessages(self: *Self, push_cursor: *u64) !?ArrayList(ArrayList(Packet)) { + fn buildPushMessages(self: *Self, push_cursor: *u64) !ArrayList(ArrayList(Packet)) { // TODO: find a better static value? var buf: [512]crds.CrdsVersionedValue = undefined; @@ -766,8 +766,11 @@ pub const GossipService = struct { break :blk crds_table.getEntriesWithCursor(&buf, push_cursor); }; + var packet_batch = ArrayList(ArrayList(Packet)).init(self.allocator); + errdefer packet_batch.deinit(); + if (crds_entries.len == 0) { - return null; + return packet_batch; } const now = getWallclockMs(); @@ -791,7 +794,7 @@ pub const GossipService = struct { var active_set: *const ActiveSet = active_set_lock.get(); defer active_set_lock.unlock(); - if (active_set.len() == 0) return null; + if (active_set.len() == 0) return packet_batch; for (crds_entries) |entry| { const value = entry.value; @@ -841,24 +844,21 @@ pub const GossipService = struct { const num_values_not_considered = crds_entries.len - num_values_considered; push_cursor.* -= num_values_not_considered; - var packet_batch = ArrayList(ArrayList(Packet)).init(self.allocator); - errdefer packet_batch.deinit(); - var push_iter = push_messages.iterator(); while (push_iter.next()) |push_entry| { const crds_values: *const ArrayList(CrdsValue) = push_entry.value_ptr; const to_endpoint: *const EndPoint = push_entry.key_ptr; // send the values as a pull response - var maybe_endpoint_packets = try crdsValuesToPackets( + var packets = try crdsValuesToPackets( self.allocator, &self.my_pubkey, crds_values.items, to_endpoint, ChunkType.PushMessage, ); - if (maybe_endpoint_packets) |endpoint_packets| { - try packet_batch.append(endpoint_packets); + if (packets.items.len > 0) { + try packet_batch.append(packets); } } return packet_batch; @@ -1041,19 +1041,19 @@ pub const GossipService = struct { std.atomic.Ordering.Release, ); - const maybe_packets = crdsValuesToPackets( + const packets = crdsValuesToPackets( self.allocator, self.my_pubkey, response_crds_values.items, self.from_endpoint, ChunkType.PullResponse, - ) catch { - return; - }; + ) catch return; - if (maybe_packets) |*packets| { + if (packets.items.len > 0) { defer packets.deinit(); - self.output.appendSlice(packets.items) catch unreachable; + self.output.appendSlice(packets.items) catch { + std.debug.panic("thread task: failed to append packets", .{}); + }; } } }; @@ -1686,8 +1686,9 @@ pub fn crdsValuesToPackets( crds_values: []CrdsValue, to_endpoint: *const EndPoint, chunk_type: ChunkType, -) error{ OutOfMemory, SerializationError }!?ArrayList(Packet) { - if (crds_values.len == 0) return null; +) error{ OutOfMemory, SerializationError }!ArrayList(Packet) { + if (crds_values.len == 0) + return ArrayList(Packet).init(allocator); const indexs = try chunkValuesIntoPacketIndexs( allocator, @@ -2230,7 +2231,7 @@ test "gossip.gossip_service: test build_push_messages" { clg.unlock(); var cursor: u64 = 0; - var msgs = (try gossip_service.buildPushMessages(&cursor)).?; + var msgs = try gossip_service.buildPushMessages(&cursor); try std.testing.expectEqual(cursor, 11); try std.testing.expect(msgs.items.len > 0); for (msgs.items) |*msg| msg.deinit(); @@ -2238,7 +2239,7 @@ test "gossip.gossip_service: test build_push_messages" { var msgs2 = try gossip_service.buildPushMessages(&cursor); try std.testing.expectEqual(cursor, 11); - try std.testing.expect(msgs2 == null); + try std.testing.expect(msgs2.items.len == 0); } test "gossip.gossip_service: test packet verification" { From 7803c875c080b8cff92ccbace53e03ea494b2f04 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 29 Nov 2023 19:13:32 -0500 Subject: [PATCH 60/72] fix(gossip): verifyPackets tasks index out of bounds with multiple batches. treat array as ring buffer --- src/gossip/gossip_service.zig | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 6d6ba205e..ab4733e84 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -337,6 +337,14 @@ pub const GossipService = struct { }; self.verified_incoming_channel.send(msg) catch unreachable; } + + /// waits for the task to be done, then resets the done state to false + fn awaitAndReset(self: *VerifyMessageTask) void { + while (!self.done.load(std.atomic.Ordering.Acquire)) { + // wait + } + self.done.store(false, std.atomic.Ordering.Release); + } }; /// main logic for deserializing Packets into Protocol messages @@ -380,7 +388,10 @@ pub const GossipService = struct { var count: usize = 0; for (packet_batches) |*packet_batch| { for (packet_batch.items) |*packet| { - var task = tasks[count]; + var task = tasks[count % socket_utils.PACKETS_PER_BATCH]; + if (count > socket_utils.PACKETS_PER_BATCH) { + task.awaitAndReset(); + } task.packet = packet; const batch = Batch.from(&task.task); @@ -390,11 +401,8 @@ pub const GossipService = struct { } } - for (tasks[0..count]) |task| { - while (!task.done.load(std.atomic.Ordering.Acquire)) { - // wait - } - task.done.store(false, std.atomic.Ordering.Release); + for (tasks[0..@min(count, socket_utils.PACKETS_PER_BATCH)]) |task| { + task.awaitAndReset(); } } From c1bd9b01551f74c5124295a120864ea8ea1d07e0 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 11:17:22 -0500 Subject: [PATCH 61/72] fix(gossip): fix off-by-one error that was causing segfaults and add assert to help expose issues like this --- src/gossip/gossip_service.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index ab4733e84..576d1e825 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -307,6 +307,7 @@ pub const GossipService = struct { pub fn callback(task: *Task) void { var self = @fieldParentPtr(@This(), "task", task); + std.debug.assert(!self.done.load(std.atomic.Ordering.Acquire)); defer self.done.store(true, std.atomic.Ordering.Release); var protocol_message = bincode.readFromSlice( @@ -389,7 +390,7 @@ pub const GossipService = struct { for (packet_batches) |*packet_batch| { for (packet_batch.items) |*packet| { var task = tasks[count % socket_utils.PACKETS_PER_BATCH]; - if (count > socket_utils.PACKETS_PER_BATCH) { + if (count >= socket_utils.PACKETS_PER_BATCH) { task.awaitAndReset(); } task.packet = packet; From faf1d43540aae0470e732b981049dc394dd6aa62 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 14:28:05 -0500 Subject: [PATCH 62/72] fix(lru): segfault in LruCache.pop the problem was that it would deinit the node before returning node data. this almost always worked fine because you would need the allocator to reuse or unmap the memory to actually see a problem, and this was typically not happening before the return statement on the next line of code was executed. but on rare occasion the address would be unmapped, causing a segfault. the solution is to defer the deinit so it happens after the return statement copies the value --- src/common/lru.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index 01bdc867f..33d82f434 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -157,7 +157,7 @@ pub fn LruCache(comptime K: type, comptime V: type) type { if (self.hashmap.fetchSwapRemove(k)) |kv| { var node = kv.value; self.dbl_link_list.remove(node); - self.deinitNode(node); + defer self.deinitNode(node); return node.data.value; } return null; From 537e9d08ed1b681b8c47523b540017276baa092c Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 20:17:06 -0500 Subject: [PATCH 63/72] fix(bincode): deserialization failure (ShredType) All enums are deserialized with the assumption they have a size of 32 bits in bincode. ShredType however is a struct in rust and enum in zig. It is serialized as 8 bits. This causes deserialization errors reading the data from mainnet because it grabs 32 bits and reads a much larger number than any of the enum variants in zig. Add logic to bincode implementation that looks for a declaration called BincodeSize within the enum. If it exists, use that instead of u32. Set it to u8 for ShredType. --- src/bincode/bincode.zig | 10 +++++++--- src/gossip/crds.zig | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/bincode/bincode.zig b/src/bincode/bincode.zig index 28125c5ff..3ca0e3c46 100644 --- a/src/bincode/bincode.zig +++ b/src/bincode/bincode.zig @@ -118,10 +118,14 @@ pub fn Deserializer(comptime Reader: type) type { } pub fn deserializeEnum(self: *Self, ally: ?std.mem.Allocator, visitor: anytype) Error!@TypeOf(visitor).Value { - const T = u32; // enum size + const T = @TypeOf(visitor).Value; + comptime var SerializedSize = u32; + comptime if (@hasDecl(T, "BincodeSize")) { + SerializedSize = T.BincodeSize; + }; const tag = switch (self.params.endian) { - .Little => self.reader.readIntLittle(T), - .Big => self.reader.readIntBig(T), + .Little => self.reader.readIntLittle(SerializedSize), + .Big => self.reader.readIntBig(SerializedSize), } catch { return Error.IO; }; diff --git a/src/gossip/crds.zig b/src/gossip/crds.zig index a913ae6f6..c9cca83e8 100644 --- a/src/gossip/crds.zig +++ b/src/gossip/crds.zig @@ -766,6 +766,8 @@ pub const NodeInstance = struct { pub const ShredType = enum(u32) { Data = 0b1010_0101, Code = 0b0101_1010, + + pub const BincodeSize = u8; }; pub const DuplicateShred = struct { From 09e336fd7fc46830614392ba56b4ad0446371ce4 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 5 Dec 2023 21:50:28 -0500 Subject: [PATCH 64/72] fix(gossip): serialize ShredType to one byte, just like deserialization does now. --- src/gossip/crds.zig | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gossip/crds.zig b/src/gossip/crds.zig index c9cca83e8..923d62802 100644 --- a/src/gossip/crds.zig +++ b/src/gossip/crds.zig @@ -763,11 +763,24 @@ pub const NodeInstance = struct { } }; -pub const ShredType = enum(u32) { +pub const ShredType = enum(u8) { Data = 0b1010_0101, Code = 0b0101_1010, + /// Enables bincode deserializer to deserialize this data from a single byte instead of 4. pub const BincodeSize = u8; + + /// Enables bincode serializer to serialize this data into a single byte instead of 4. + pub const @"getty.sb" = struct { + pub fn serialize( + allocator: ?std.mem.Allocator, + value: anytype, + serializer: anytype, + ) @TypeOf(serializer).Error!@TypeOf(serializer).Ok { + _ = allocator; + return try serializer.serializeInt(@intFromEnum(value)); + } + }; }; pub const DuplicateShred = struct { From d0051d2c557404d893d1bad6baae0cae729d5459 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 6 Dec 2023 17:37:26 -0500 Subject: [PATCH 65/72] fix(gossip): #50 Double free in ActiveSet # Root cause Multiple contacts with the same pubkey were being passed to the rotate method. This resulted in duplicate items being included in the array, but the hashmap internally prevents there from being duplicates. So the next time rotate is called, it would iterate through the array and attempt to deinit the same hashmap entry twice. # Solution Duplicates are detected and skipped during the insertion loop by checking for their presence in the hashmap. # Alternate Solution This could have also been addressed with a simpler approach: Remove the peers field from ActiveSet. This field does not seem necessary, since the same data is already in the hashmap. Even though it would be simpler code, it is a more significant change in behavior and compatibility. To be safe, I decided not to take this approach. --- src/gossip/active_set.zig | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index 9f760d0e0..2e0956d06 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -70,8 +70,12 @@ pub const ActiveSet = struct { pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); - for (0..size) |i| { - self.peers[i] = crds_peers[i].id; + var tgt: u8 = 0; + for (0..size) |src| { + if (self.pruned_peers.contains(crds_peers[src].id)) { + continue; + } + self.peers[tgt] = crds_peers[src].id; // *full* hard restart on blooms -- labs doesnt do this - bug? var bloom = try Bloom.random( @@ -80,9 +84,10 @@ pub const ActiveSet = struct { BLOOM_FALSE_RATE, BLOOM_MAX_BITS, ); - try self.pruned_peers.put(self.peers[i], bloom); + try self.pruned_peers.put(self.peers[tgt], bloom); + tgt += 1; } - self.len = size; + self.len = tgt; } pub fn prune(self: *Self, from: Pubkey, origin: Pubkey) void { @@ -177,3 +182,24 @@ test "gossip.active_set: init/deinit" { defer fanout_with_prune.deinit(); try std.testing.expectEqual(no_prune_fanout_len, fanout_with_prune.items.len + 1); } + +// This used to cause a double free when rotating after duplicate ids were inserted +// because there were two entries in the array but only one entry in the hashmap. +// Now the logic prevents duplicates, and this test prevents regressions. +test "gossip.active_set: gracefully rotates with duplicate contact ids" { + var alloc = std.testing.allocator; + + var rng = std.rand.DefaultPrng.init(100); + var gossip_peers = try std.ArrayList(crds.LegacyContactInfo).initCapacity(alloc, 10); + defer gossip_peers.deinit(); + + var data = crds.LegacyContactInfo.random(rng.random()); + var dupe = crds.LegacyContactInfo.random(rng.random()); + dupe.id = data.id; + try gossip_peers.append(data); + try gossip_peers.append(dupe); + + var active_set = ActiveSet.init(alloc); + defer active_set.deinit(); + try active_set.rotate(gossip_peers.items); +} From f155bf1c2bfd53dd11736741d02c3dbc98bdeac2 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 7 Dec 2023 11:18:36 -0500 Subject: [PATCH 66/72] remove peers and len fields from ActiveSet this is the alternate solution for the double free bug fix. --- src/gossip/active_set.zig | 35 ++++++++++++++++------------------- src/gossip/gossip_service.zig | 9 +++++---- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index 2e0956d06..2f6f83186 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -28,9 +28,7 @@ const BLOOM_MAX_BITS: usize = 1024 * 8 * 4; pub const ActiveSet = struct { // store pubkeys as keys in crds table bc the data can change - peers: [NUM_ACTIVE_SET_ENTRIES]Pubkey, pruned_peers: std.AutoHashMap(Pubkey, Bloom), - len: u8 = 0, allocator: std.mem.Allocator, const Self = @This(); @@ -38,16 +36,19 @@ pub const ActiveSet = struct { pub fn init( allocator: std.mem.Allocator, ) Self { - return Self{ .peers = undefined, .pruned_peers = std.AutoHashMap(Pubkey, Bloom).init(allocator), .len = 0, .allocator = allocator }; + return Self{ .pruned_peers = std.AutoHashMap(Pubkey, Bloom).init(allocator), .allocator = allocator }; } pub fn deinit(self: *Self) void { - for (self.peers[0..self.len]) |peer| { - var entry = self.pruned_peers.getEntry(peer).?; + var iter = self.pruned_peers.iterator(); + while (iter.next()) |entry| { entry.value_ptr.deinit(); } self.pruned_peers.deinit(); - self.len = 0; + } + + pub inline fn len(self: *const Self) u32 { + return self.pruned_peers.unmanaged.size; } pub fn rotate( @@ -55,11 +56,10 @@ pub const ActiveSet = struct { crds_peers: []crds.LegacyContactInfo, ) error{OutOfMemory}!void { // clear the existing - for (self.peers[0..self.len]) |peer| { - var entry = self.pruned_peers.getEntry(peer).?; + var iter = self.pruned_peers.iterator(); + while (iter.next()) |entry| { entry.value_ptr.deinit(); } - self.len = 0; self.pruned_peers.clearRetainingCapacity(); if (crds_peers.len == 0) { @@ -70,12 +70,10 @@ pub const ActiveSet = struct { pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); - var tgt: u8 = 0; for (0..size) |src| { if (self.pruned_peers.contains(crds_peers[src].id)) { continue; } - self.peers[tgt] = crds_peers[src].id; // *full* hard restart on blooms -- labs doesnt do this - bug? var bloom = try Bloom.random( @@ -84,10 +82,8 @@ pub const ActiveSet = struct { BLOOM_FALSE_RATE, BLOOM_MAX_BITS, ); - try self.pruned_peers.put(self.peers[tgt], bloom); - tgt += 1; + try self.pruned_peers.put(crds_peers[src].id, bloom); } - self.len = tgt; } pub fn prune(self: *Self, from: Pubkey, origin: Pubkey) void { @@ -111,17 +107,17 @@ pub const ActiveSet = struct { errdefer active_set_endpoints.deinit(); // change to while loop - for (self.peers[0..self.len]) |peer_pubkey| { + var iter = self.pruned_peers.iterator(); + while (iter.next()) |entry| { // lookup peer contact info const peer_info = crds_table.get(crds.CrdsValueLabel{ - .LegacyContactInfo = peer_pubkey, + .LegacyContactInfo = entry.key_ptr.*, }) orelse continue; // peer pubkey could have been removed from the crds table const peer_gossip_addr = peer_info.value.data.LegacyContactInfo.gossip; crds.sanitizeSocket(&peer_gossip_addr) catch continue; // check if peer has been pruned - const entry = self.pruned_peers.getEntry(peer_pubkey) orelse unreachable; const origin_bytes = origin.data; if (entry.value_ptr.contains(&origin_bytes)) { continue; @@ -166,7 +162,7 @@ test "gossip.active_set: init/deinit" { defer active_set.deinit(); try active_set.rotate(gossip_peers.items); - try std.testing.expect(active_set.len == CRDS_GOSSIP_PUSH_FANOUT); + try std.testing.expect(active_set.len() == CRDS_GOSSIP_PUSH_FANOUT); const origin = Pubkey.random(rng.random(), .{}); @@ -175,7 +171,8 @@ test "gossip.active_set: init/deinit" { const no_prune_fanout_len = fanout.items.len; try std.testing.expect(no_prune_fanout_len > 0); - const peer_pubkey = active_set.peers[0]; + var iter = active_set.pruned_peers.keyIterator(); + const peer_pubkey = iter.next().?.*; active_set.prune(peer_pubkey, origin); var fanout_with_prune = try active_set.getFanoutPeers(alloc, origin, &crds_table); diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 576d1e825..493bd271f 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -796,7 +796,7 @@ pub const GossipService = struct { var active_set: *const ActiveSet = active_set_lock.get(); defer active_set_lock.unlock(); - if (active_set.len == 0) return null; + if (active_set.len() == 0) return null; for (crds_entries) |entry| { const value = entry.value; @@ -1861,8 +1861,9 @@ test "gossip.gossip_service: tests handle_prune_messages" { var as_lock = gossip_service.active_set_rw.read(); var as: *const ActiveSet = as_lock.get(); - try std.testing.expect(as.len > 0); // FIX - var peer0 = as.peers[0]; + try std.testing.expect(as.len() > 0); // FIX + var iter = as.pruned_peers.keyIterator(); + const peer0 = iter.next().?.*; as_lock.unlock(); var prunes = [_]Pubkey{Pubkey.random(rng.random(), .{})}; @@ -2221,7 +2222,7 @@ test "gossip.gossip_service: test build_push_messages" { var as: *ActiveSet = as_lock.mut(); try as.rotate(peers.items); as_lock.unlock(); - try std.testing.expect(as.len > 0); + try std.testing.expect(as.len() > 0); } { From 2b10f6ea4cadc7b668aa8ed0740de736bf5903f2 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 7 Dec 2023 12:11:18 -0500 Subject: [PATCH 67/72] fix(gossip): address review in double free pr #55 - remove comment - use HashMap.count for ActiveSet.len - use getOrPut to avoid repeated lookups in ActiveSet.rotate --- src/gossip/active_set.zig | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/gossip/active_set.zig b/src/gossip/active_set.zig index 2f6f83186..2b634d4f7 100644 --- a/src/gossip/active_set.zig +++ b/src/gossip/active_set.zig @@ -47,8 +47,8 @@ pub const ActiveSet = struct { self.pruned_peers.deinit(); } - pub inline fn len(self: *const Self) u32 { - return self.pruned_peers.unmanaged.size; + pub fn len(self: *const Self) u32 { + return self.pruned_peers.count(); } pub fn rotate( @@ -70,19 +70,18 @@ pub const ActiveSet = struct { pull_request.shuffleFirstN(rng.random(), crds.LegacyContactInfo, crds_peers, size); const bloom_num_items = @max(crds_peers.len, MIN_NUM_BLOOM_ITEMS); - for (0..size) |src| { - if (self.pruned_peers.contains(crds_peers[src].id)) { - continue; + for (0..size) |i| { + var entry = try self.pruned_peers.getOrPut(crds_peers[i].id); + if (entry.found_existing == false) { + // *full* hard restart on blooms -- labs doesnt do this - bug? + var bloom = try Bloom.random( + self.allocator, + bloom_num_items, + BLOOM_FALSE_RATE, + BLOOM_MAX_BITS, + ); + entry.value_ptr.* = bloom; } - - // *full* hard restart on blooms -- labs doesnt do this - bug? - var bloom = try Bloom.random( - self.allocator, - bloom_num_items, - BLOOM_FALSE_RATE, - BLOOM_MAX_BITS, - ); - try self.pruned_peers.put(crds_peers[src].id, bloom); } } @@ -180,9 +179,6 @@ test "gossip.active_set: init/deinit" { try std.testing.expectEqual(no_prune_fanout_len, fanout_with_prune.items.len + 1); } -// This used to cause a double free when rotating after duplicate ids were inserted -// because there were two entries in the array but only one entry in the hashmap. -// Now the logic prevents duplicates, and this test prevents regressions. test "gossip.active_set: gracefully rotates with duplicate contact ids" { var alloc = std.testing.allocator; From 06bb096ebc7a8a2807c5949ccade00cdb220af74 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 8 Dec 2023 17:01:23 -0500 Subject: [PATCH 68/72] improve task allocation/using threadpool code --- src/gossip/crds_table.zig | 24 +++++++-------------- src/gossip/gossip_service.zig | 40 +++++++++++++++-------------------- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index fd71a0100..62f734748 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -635,38 +635,30 @@ pub const CrdsTable = struct { const cutoff_timestamp = now -| timeout; const n_pubkeys = self.pubkey_to_values.count(); - var tasks = try std.ArrayList(*GetOldLabelsTask).initCapacity(self.allocator, n_pubkeys); + var tasks = try self.allocator.alloc(GetOldLabelsTask, n_pubkeys); defer { - for (tasks.items) |task| { - task.deinit(); - self.allocator.destroy(task); - } - tasks.deinit(); + for (tasks) |*task| task.deinit(); + self.allocator.free(tasks); } // run this loop in parallel - for (self.pubkey_to_values.keys()[0..n_pubkeys]) |key| { + for (self.pubkey_to_values.keys()[0..n_pubkeys], 0..n_pubkeys) |key, i| { var old_labels = std.ArrayList(CrdsValueLabel).init(self.allocator); - var task = GetOldLabelsTask{ + tasks[i] = GetOldLabelsTask{ .key = key, .crds_table = self, .cutoff_timestamp = cutoff_timestamp, .old_labels = old_labels, }; - // alloc on heap - var task_heap = try self.allocator.create(GetOldLabelsTask); - task_heap.* = task; - tasks.appendAssumeCapacity(task_heap); - // run it - const batch = Batch.from(&task_heap.task); + const batch = Batch.from(&tasks[i].task); ThreadPool.schedule(self.thread_pool, batch); } // wait for them to be done to release the lock var output_length: u64 = 0; - for (tasks.items) |task| { + for (tasks) |*task| { while (!task.done.load(std.atomic.Ordering.Acquire)) { // wait } @@ -675,7 +667,7 @@ pub const CrdsTable = struct { // move labels to one big array var output = try std.ArrayList(CrdsValueLabel).initCapacity(self.allocator, output_length); - for (tasks.items) |task| { + for (tasks) |*task| { output.appendSliceAssumeCapacity(task.old_labels.items); } diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 493bd271f..34bcff703 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -352,23 +352,18 @@ pub const GossipService = struct { /// and verifing they have valid values, and have valid signatures. /// Verified Protocol messages are then sent to the verified_channel. fn verifyPackets(self: *Self) !void { - var tasks: [socket_utils.PACKETS_PER_BATCH]*VerifyMessageTask = undefined; + var tasks = try self.allocator.alloc(VerifyMessageTask, socket_utils.PACKETS_PER_BATCH); + defer self.allocator.free(tasks); + // pre-allocate all the tasks - for (0..tasks.len) |i| { - var verify_task_heap = try self.allocator.create(VerifyMessageTask); - verify_task_heap.* = VerifyMessageTask{ + for (tasks) |*task| { + task.* = VerifyMessageTask{ .task = .{ .callback = VerifyMessageTask.callback }, .allocator = self.allocator, .verified_incoming_channel = self.verified_incoming_channel, .packet = &Packet.default(), .logger = self.logger, }; - tasks[i] = verify_task_heap; - } - defer { - for (tasks) |task| { - self.allocator.destroy(task); - } } while (!self.exit.load(std.atomic.Ordering.Unordered)) { @@ -389,7 +384,7 @@ pub const GossipService = struct { var count: usize = 0; for (packet_batches) |*packet_batch| { for (packet_batch.items) |*packet| { - var task = tasks[count % socket_utils.PACKETS_PER_BATCH]; + var task = &tasks[count % socket_utils.PACKETS_PER_BATCH]; if (count >= socket_utils.PACKETS_PER_BATCH) { task.awaitAndReset(); } @@ -402,7 +397,7 @@ pub const GossipService = struct { } } - for (tasks[0..@min(count, socket_utils.PACKETS_PER_BATCH)]) |task| { + for (tasks[0..@min(count, socket_utils.PACKETS_PER_BATCH)]) |*task| { task.awaitAndReset(); } } @@ -1107,12 +1102,11 @@ pub const GossipService = struct { // create the pull requests const n_valid_requests = valid_indexs.items.len; - var tasks = try ArrayList(*PullRequestTask).initCapacity(self.allocator, n_valid_requests); + + var tasks = try self.allocator.alloc(PullRequestTask, n_valid_requests); defer { - for (tasks.items) |task| { - self.allocator.destroy(task); - } - tasks.deinit(); + for (tasks) |*task| task.deinit(); + self.allocator.free(tasks); } { @@ -1122,10 +1116,10 @@ pub const GossipService = struct { var output_limit = std.atomic.Atomic(i64).init(MAX_NUM_CRDS_VALUES_PULL_RESPONSE); + var task_index: usize = 0; for (valid_indexs.items) |i| { // create the thread task - var task_heap = try self.allocator.create(PullRequestTask); - task_heap.* = PullRequestTask{ + tasks[task_index] = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, .my_pubkey = &self.my_pubkey, .from_endpoint = &pull_requests.items[i].from_endpoint, @@ -1136,22 +1130,22 @@ pub const GossipService = struct { .allocator = self.allocator, .output_limit = &output_limit, }; - tasks.appendAssumeCapacity(task_heap); + task_index += 1; // run it - const batch = Batch.from(&task_heap.task); + const batch = Batch.from(&tasks[task_index].task); ThreadPool.schedule(self.thread_pool, batch); } // wait for them to be done to release the lock - for (tasks.items) |task| { + for (tasks) |*task| { while (!task.done.load(std.atomic.Ordering.Acquire)) { // wait } } } - for (tasks.items) |task| { + for (tasks) |*task| { if (task.output.items.len > 0) { // TODO: should only need one mux lock in this loop try self.packet_outgoing_channel.send(task.output); From d01fdeddf4aa889b187de2aae1cbd487459a73ef Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 8 Dec 2023 17:15:58 -0500 Subject: [PATCH 69/72] small cleanups on the prev commit --- src/gossip/crds_table.zig | 2 +- src/gossip/gossip_service.zig | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 62f734748..8638af861 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -642,7 +642,7 @@ pub const CrdsTable = struct { } // run this loop in parallel - for (self.pubkey_to_values.keys()[0..n_pubkeys], 0..n_pubkeys) |key, i| { + for (self.pubkey_to_values.keys()[0..n_pubkeys], 0..) |key, i| { var old_labels = std.ArrayList(CrdsValueLabel).init(self.allocator); tasks[i] = GetOldLabelsTask{ .key = key, diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index 34bcff703..d2b8a9722 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -1116,8 +1116,7 @@ pub const GossipService = struct { var output_limit = std.atomic.Atomic(i64).init(MAX_NUM_CRDS_VALUES_PULL_RESPONSE); - var task_index: usize = 0; - for (valid_indexs.items) |i| { + for (valid_indexs.items, 0..) |i, task_index| { // create the thread task tasks[task_index] = PullRequestTask{ .task = .{ .callback = PullRequestTask.callback }, @@ -1130,7 +1129,6 @@ pub const GossipService = struct { .allocator = self.allocator, .output_limit = &output_limit, }; - task_index += 1; // run it const batch = Batch.from(&tasks[task_index].task); From 9ead905e96c9b119c69cd7df2f3fd851b9a346d6 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Wed, 13 Dec 2023 12:56:02 -0500 Subject: [PATCH 70/72] incorp comments --- src/gossip/fuzz.zig | 10 ++++---- src/gossip/gossip_service.zig | 45 ++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/gossip/fuzz.zig b/src/gossip/fuzz.zig index 9b134a1a3..d44021891 100644 --- a/src/gossip/fuzz.zig +++ b/src/gossip/fuzz.zig @@ -8,7 +8,7 @@ const socket_utils = @import("./socket_utils.zig"); const _gossip_service = @import("./gossip_service.zig"); const GossipService = _gossip_service.GossipService; const ChunkType = _gossip_service.ChunkType; -const crds_values_to_packets = _gossip_service.crdsValuesToPackets; +const crdsValuesToPackets = _gossip_service.crdsValuesToPackets; const MAX_PUSH_MESSAGE_PAYLOAD_SIZE = _gossip_service.MAX_PUSH_MESSAGE_PAYLOAD_SIZE; const Logger = @import("../trace/log.zig").Logger; @@ -119,14 +119,14 @@ pub fn randomPushMessage(rng: std.rand.Random, keypair: *const KeyPair, to_addr: } const allocator = std.heap.page_allocator; - const packets = try crds_values_to_packets( + const packets = try crdsValuesToPackets( allocator, &Pubkey.fromPublicKey(&keypair.public_key, false), &crds_values, &to_addr, ChunkType.PushMessage, ); - return packets.?; + return packets; } pub fn randomPullResponse(rng: std.rand.Random, keypair: *const KeyPair, to_addr: EndPoint) !std.ArrayList(Packet) { @@ -139,14 +139,14 @@ pub fn randomPullResponse(rng: std.rand.Random, keypair: *const KeyPair, to_addr } const allocator = std.heap.page_allocator; - const packets = try crds_values_to_packets( + const packets = try crdsValuesToPackets( allocator, &Pubkey.fromPublicKey(&keypair.public_key, false), &crds_values, &to_addr, ChunkType.PullResponse, ); - return packets.?; + return packets; } pub fn randomPullRequest(allocator: std.mem.Allocator, rng: std.rand.Random, keypair: *const KeyPair, to_addr: EndPoint) !Packet { diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index d2b8a9722..a34efbc04 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -148,7 +148,7 @@ pub const GossipService = struct { } var thread_pool = try allocator.create(ThreadPool); - var n_threads = @min(@as(u32, @truncate(std.Thread.getCpuCount() catch 0)), 8); + var n_threads = @min(@as(u32, @truncate(std.Thread.getCpuCount() catch 1)), 8); thread_pool.* = ThreadPool.init(.{ .max_threads = n_threads, .stack_size = 2 * 1024 * 1024, @@ -736,8 +736,8 @@ pub const GossipService = struct { defer valid_gossip_indexs.deinit(); var valid_gossip_peers: [NUM_ACTIVE_SET_ENTRIES]LegacyContactInfo = undefined; - for (valid_gossip_indexs.items) |i| { - valid_gossip_peers[i] = gossip_peers[i]; + for (0.., valid_gossip_indexs.items) |i, valid_gossip_index| { + valid_gossip_peers[i] = gossip_peers[valid_gossip_index]; } // send pings to peers @@ -754,7 +754,7 @@ pub const GossipService = struct { /// logic for building new push messages which are sent to peers from the /// active set and serialized into packets. - fn buildPushMessages(self: *Self, push_cursor: *u64) !?ArrayList(ArrayList(Packet)) { + fn buildPushMessages(self: *Self, push_cursor: *u64) !ArrayList(ArrayList(Packet)) { // TODO: find a better static value? var buf: [512]crds.CrdsVersionedValue = undefined; @@ -766,8 +766,11 @@ pub const GossipService = struct { break :blk crds_table.getEntriesWithCursor(&buf, push_cursor); }; + var packet_batch = ArrayList(ArrayList(Packet)).init(self.allocator); + errdefer packet_batch.deinit(); + if (crds_entries.len == 0) { - return null; + return packet_batch; } const now = getWallclockMs(); @@ -791,7 +794,7 @@ pub const GossipService = struct { var active_set: *const ActiveSet = active_set_lock.get(); defer active_set_lock.unlock(); - if (active_set.len() == 0) return null; + if (active_set.len() == 0) return packet_batch; for (crds_entries) |entry| { const value = entry.value; @@ -841,24 +844,21 @@ pub const GossipService = struct { const num_values_not_considered = crds_entries.len - num_values_considered; push_cursor.* -= num_values_not_considered; - var packet_batch = ArrayList(ArrayList(Packet)).init(self.allocator); - errdefer packet_batch.deinit(); - var push_iter = push_messages.iterator(); while (push_iter.next()) |push_entry| { const crds_values: *const ArrayList(CrdsValue) = push_entry.value_ptr; const to_endpoint: *const EndPoint = push_entry.key_ptr; // send the values as a pull response - var maybe_endpoint_packets = try crdsValuesToPackets( + var packets = try crdsValuesToPackets( self.allocator, &self.my_pubkey, crds_values.items, to_endpoint, ChunkType.PushMessage, ); - if (maybe_endpoint_packets) |endpoint_packets| { - try packet_batch.append(endpoint_packets); + if (packets.items.len > 0) { + try packet_batch.append(packets); } } return packet_batch; @@ -1041,19 +1041,19 @@ pub const GossipService = struct { std.atomic.Ordering.Release, ); - const maybe_packets = crdsValuesToPackets( + const packets = crdsValuesToPackets( self.allocator, self.my_pubkey, response_crds_values.items, self.from_endpoint, ChunkType.PullResponse, - ) catch { - return; - }; + ) catch return; - if (maybe_packets) |*packets| { + if (packets.items.len > 0) { defer packets.deinit(); - self.output.appendSlice(packets.items) catch unreachable; + self.output.appendSlice(packets.items) catch { + std.debug.panic("thread task: failed to append packets", .{}); + }; } } }; @@ -1686,8 +1686,9 @@ pub fn crdsValuesToPackets( crds_values: []CrdsValue, to_endpoint: *const EndPoint, chunk_type: ChunkType, -) error{ OutOfMemory, SerializationError }!?ArrayList(Packet) { - if (crds_values.len == 0) return null; +) error{ OutOfMemory, SerializationError }!ArrayList(Packet) { + if (crds_values.len == 0) + return ArrayList(Packet).init(allocator); const indexs = try chunkValuesIntoPacketIndexs( allocator, @@ -2230,7 +2231,7 @@ test "gossip.gossip_service: test build_push_messages" { clg.unlock(); var cursor: u64 = 0; - var msgs = (try gossip_service.buildPushMessages(&cursor)).?; + var msgs = try gossip_service.buildPushMessages(&cursor); try std.testing.expectEqual(cursor, 11); try std.testing.expect(msgs.items.len > 0); for (msgs.items) |*msg| msg.deinit(); @@ -2238,7 +2239,7 @@ test "gossip.gossip_service: test build_push_messages" { var msgs2 = try gossip_service.buildPushMessages(&cursor); try std.testing.expectEqual(cursor, 11); - try std.testing.expect(msgs2 == null); + try std.testing.expect(msgs2.items.len == 0); } test "gossip.gossip_service: test packet verification" { From 28f58444576fb4017b3263511f7f376f144dec90 Mon Sep 17 00:00:00 2001 From: ultd Date: Fri, 15 Dec 2023 15:05:15 -0800 Subject: [PATCH 71/72] make lru locking or non-locking via comptime --- src/common/lru.zig | 59 ++++++++++++++++++++++++++++++++++++++-- src/gossip/ping_pong.zig | 12 ++++---- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index 33d82f434..c5529361f 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -3,10 +3,17 @@ const Allocator = std.mem.Allocator; const TailQueue = std.TailQueue; const testing = std.testing; const assert = std.debug.assert; +const Mutex = std.Thread.Mutex; + +pub const Kind = enum { + locking, + non_locking, +}; // TODO: allow for passing custom hash context to use in std.ArrayHashMap for performance. -pub fn LruCache(comptime K: type, comptime V: type) type { +pub fn LruCache(comptime kind: Kind, comptime K: type, comptime V: type) type { return struct { + mux: if (kind == .locking) Mutex else void, allocator: Allocator, hashmap: if (K == []const u8) std.StringArrayHashMap(*Node) else std.AutoArrayHashMap(K, *Node), dbl_link_list: TailQueue(LruEntry), @@ -50,6 +57,7 @@ pub fn LruCache(comptime K: type, comptime V: type) type { .hashmap = hashmap, .dbl_link_list = TailQueue(LruEntry){}, .max_items = max_items, + .mux = if (kind == .locking) Mutex{} else undefined, }; // pre allocate enough capacity for max items since we will use @@ -111,6 +119,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Inserts key/value if key doesn't exist, updates only value if it does. /// In any case, it will affect cache ordering. pub fn insert(self: *Self, key: K, value: V) error{OutOfMemory}!void { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + _ = self.internal_insert(key, value); return; } @@ -118,11 +131,21 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Whether or not contains key. /// NOTE: doesn't affect cache ordering. pub fn contains(self: *Self, key: K) bool { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + return self.hashmap.contains(key); } /// Most recently used entry pub fn mru(self: *Self) ?LruEntry { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.dbl_link_list.last) |node| { return node.data; } @@ -131,6 +154,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Least recently used entry pub fn lru(self: *Self) ?LruEntry { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.dbl_link_list.first) |node| { return node.data; } @@ -145,6 +173,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Gets value associated with key if exists pub fn get(self: *Self, key: K) ?V { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.hashmap.get(key)) |node| { self.dbl_link_list.remove(node); self.dbl_link_list.append(node); @@ -154,6 +187,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } pub fn pop(self: *Self, k: K) ?V { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.hashmap.fetchSwapRemove(k)) |kv| { var node = kv.value; self.dbl_link_list.remove(node); @@ -164,6 +202,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } pub fn peek(self: *Self, key: K) ?V { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.hashmap.get(key)) |node| { return node.data.value; } @@ -174,6 +217,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Puts a key-value pair into cache. If the key already exists in the cache, then it updates /// the key's value and returns the old value. Otherwise, `null` is returned. pub fn put(self: *Self, key: K, value: V) ?V { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.hashmap.getEntry(key)) |existing_entry| { var existing_node: *Node = existing_entry.value_ptr.*; var old_value = existing_node.data.value; @@ -188,6 +236,11 @@ pub fn LruCache(comptime K: type, comptime V: type) type { /// Removes key from cache. Returns true if found, false if not. pub fn remove(self: *Self, key: K) bool { + if (kind == .locking) { + self.mux.lock(); + defer self.mux.unlock(); + } + if (self.hashmap.fetchSwapRemove(key)) |kv| { var node = kv.value; self.dbl_link_list.remove(node); @@ -200,7 +253,7 @@ pub fn LruCache(comptime K: type, comptime V: type) type { } test "common.lru: LruCache state is correct" { - var cache = try LruCache(u64, []const u8).init(testing.allocator, 4); + var cache = try LruCache(.locking, u64, []const u8).init(testing.allocator, 4); defer cache.deinit(); try cache.insert(1, "one"); @@ -232,7 +285,7 @@ test "common.lru: LruCache state is correct" { } test "common.lru: put works as expected" { - var cache = try LruCache([]const u8, usize).init(testing.allocator, 4); + var cache = try LruCache(.non_locking, []const u8, usize).init(testing.allocator, 4); defer cache.deinit(); try cache.insert("a", 1); diff --git a/src/gossip/ping_pong.zig b/src/gossip/ping_pong.zig index 9caf87bcc..093134727 100644 --- a/src/gossip/ping_pong.zig +++ b/src/gossip/ping_pong.zig @@ -112,12 +112,12 @@ pub const PingCache = struct { rate_limit_delay_ns: u64, // Timestamp of last ping message sent to a remote node. // Used to rate limit pings to remote nodes. - pings: LruCache(PubkeyAndSocketAddr, Instant), + pings: LruCache(.non_locking, PubkeyAndSocketAddr, Instant), // Verified pong responses from remote nodes. - pongs: LruCache(PubkeyAndSocketAddr, Instant), + pongs: LruCache(.non_locking, PubkeyAndSocketAddr, Instant), // Hash of ping tokens sent out to remote nodes, // pending a pong response back. - pending_cache: LruCache(Hash, PubkeyAndSocketAddr), + pending_cache: LruCache(.non_locking, Hash, PubkeyAndSocketAddr), // allocator allocator: std.mem.Allocator, @@ -133,9 +133,9 @@ pub const PingCache = struct { return Self{ .ttl_ns = ttl_ns, .rate_limit_delay_ns = rate_limit_delay_ns, - .pings = try LruCache(PubkeyAndSocketAddr, Instant).init(allocator, cache_capacity), - .pongs = try LruCache(PubkeyAndSocketAddr, Instant).init(allocator, cache_capacity), - .pending_cache = try LruCache(Hash, PubkeyAndSocketAddr).init(allocator, cache_capacity), + .pings = try LruCache(.non_locking, PubkeyAndSocketAddr, Instant).init(allocator, cache_capacity), + .pongs = try LruCache(.non_locking, PubkeyAndSocketAddr, Instant).init(allocator, cache_capacity), + .pending_cache = try LruCache(.non_locking, Hash, PubkeyAndSocketAddr).init(allocator, cache_capacity), .allocator = allocator, }; } From 1c3c02a5ebd3a014efc4fdf9336a1796ba7dfd82 Mon Sep 17 00:00:00 2001 From: x19 <0x39015319@gmail.com> Date: Fri, 15 Dec 2023 18:26:35 -0500 Subject: [PATCH 72/72] update threadpool api --- src/gossip/crds_table.zig | 2 +- src/gossip/gossip_service.zig | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gossip/crds_table.zig b/src/gossip/crds_table.zig index 8638af861..d680ab2a5 100644 --- a/src/gossip/crds_table.zig +++ b/src/gossip/crds_table.zig @@ -653,7 +653,7 @@ pub const CrdsTable = struct { // run it const batch = Batch.from(&tasks[i].task); - ThreadPool.schedule(self.thread_pool, batch); + self.thread_pool.schedule(batch); } // wait for them to be done to release the lock diff --git a/src/gossip/gossip_service.zig b/src/gossip/gossip_service.zig index a34efbc04..fb3d6a83b 100644 --- a/src/gossip/gossip_service.zig +++ b/src/gossip/gossip_service.zig @@ -391,7 +391,7 @@ pub const GossipService = struct { task.packet = packet; const batch = Batch.from(&task.task); - ThreadPool.schedule(self.thread_pool, batch); + self.thread_pool.schedule(batch); count += 1; } @@ -1132,7 +1132,7 @@ pub const GossipService = struct { // run it const batch = Batch.from(&tasks[task_index].task); - ThreadPool.schedule(self.thread_pool, batch); + self.thread_pool.schedule(batch); } // wait for them to be done to release the lock