From 025158e242801650da23eefb903d68d0fbe95d56 Mon Sep 17 00:00:00 2001 From: ultd Date: Tue, 23 Jan 2024 20:10:59 -0600 Subject: [PATCH] resolved conflicts --- build.zig | 12 + c/swissmap/.gitignore | 5 + c/swissmap/LICENSE | 201 +++++++++++++++ c/swissmap/Makefile | 28 +++ c/swissmap/README.md | 13 + c/swissmap/example.zig | 40 +++ c/swissmap/include/hashmap.h | 150 +++++++++++ c/swissmap/include/main.h | 15 ++ c/swissmap/lib/hashmap.c | 476 +++++++++++++++++++++++++++++++++++ c/swissmap/src/main.c | 102 ++++++++ src/core/accounts_db.zig | 5 + 11 files changed, 1047 insertions(+) create mode 100644 c/swissmap/.gitignore create mode 100644 c/swissmap/LICENSE create mode 100644 c/swissmap/Makefile create mode 100644 c/swissmap/README.md create mode 100644 c/swissmap/example.zig create mode 100644 c/swissmap/include/hashmap.h create mode 100644 c/swissmap/include/main.h create mode 100644 c/swissmap/lib/hashmap.c create mode 100644 c/swissmap/src/main.c diff --git a/build.zig b/build.zig index a1de46239..8de728303 100644 --- a/build.zig +++ b/build.zig @@ -67,6 +67,10 @@ pub fn build(b: *std.Build) void { lib.addModule("getty", getty_mod); lib.addModule("httpz", httpz_mod); + lib.linkLibC(); + lib.addCSourceFiles(&.{"c/swissmap/lib/hashmap.c"}, &.{ "-g", "-O3" }); + lib.addIncludePath(.{ .path = "c/swissmap/include" }); + // This declares intent for the library to be installed into the standard // location when the user invokes the "install" step (the default step when // running `zig build`). @@ -85,6 +89,10 @@ pub fn build(b: *std.Build) void { tests.addModule("getty", getty_mod); tests.addModule("httpz", httpz_mod); + tests.linkLibC(); + tests.addCSourceFiles(&.{"c/swissmap/lib/hashmap.c"}, &.{ "-g", "-O3" }); + tests.addIncludePath(.{ .path = "c/swissmap/include" }); + const run_tests = b.addRunArtifact(tests); const test_step = b.step("test", "Run library tests"); test_step.dependOn(&lib.step); @@ -104,6 +112,10 @@ pub fn build(b: *std.Build) void { exe.addModule("getty", getty_mod); exe.addModule("httpz", httpz_mod); + exe.linkLibC(); + exe.addCSourceFiles(&.{"c/swissmap/lib/hashmap.c"}, &.{ "-g", "-O3" }); + exe.addIncludePath(.{ .path = "c/swissmap/include" }); + // This declares intent for the executable to be installed into the // standard location when the user invokes the "install" step (the default // step when running `zig build`). diff --git a/c/swissmap/.gitignore b/c/swissmap/.gitignore new file mode 100644 index 000000000..3d9d2816a --- /dev/null +++ b/c/swissmap/.gitignore @@ -0,0 +1,5 @@ +.ccls-cache +hashmap +lib/hash.o +lib/hashmap.o +src/main.o diff --git a/c/swissmap/LICENSE b/c/swissmap/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/c/swissmap/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/c/swissmap/Makefile b/c/swissmap/Makefile new file mode 100644 index 000000000..c90db2006 --- /dev/null +++ b/c/swissmap/Makefile @@ -0,0 +1,28 @@ +CC = gcc + +CFLAGS = -Wall -Werror -w -std=c11 -march=native -O3 + +BIN = hashmap + +SRC = src/main.c \ + lib/hash.c \ + lib/hashmap.c + +OBJS = src/main.o \ + lib/hash.o \ + lib/hashmap.o + +.SUFFIXES: .o .c + +$(BIN): $(OBJS) + $(CC) $(OBJS) $(CFLAGS) -o $(BIN) + +.c.o: + $(CC) -Iinclude -c $< -o $@ $(CFLAGS) + + +run: $(BIN) + @$(MAKE) && ./$(BIN) + +clean: + rm -f $(OBJS) $(BIN) diff --git a/c/swissmap/README.md b/c/swissmap/README.md new file mode 100644 index 000000000..b38f52671 --- /dev/null +++ b/c/swissmap/README.md @@ -0,0 +1,13 @@ +# Swissmap + +This library is a C port of Google's high-performance SwissTable hashmap. + +Based on [CppCon 2017: Matt Kulukundis “Designing a Fast, Efficient, Cache-friendly Hash Table, Step by Step”](https://www.youtube.com/watch?v=ncHmEUmJZf4) + +Requirements: SSE2 + +Hash function used for testing purposes: [DJB2](http://www.cse.yorku.ca/~oz/hash.html) + +# License + +Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) diff --git a/c/swissmap/example.zig b/c/swissmap/example.zig new file mode 100644 index 000000000..6521420b8 --- /dev/null +++ b/c/swissmap/example.zig @@ -0,0 +1,40 @@ +const std = @import("std"); +const Atomic = std.atomic.Atomic; +const json = std.json; +var gpa = std.heap.GeneralPurposeAllocator(.{}){}; +const gpa_allocator = gpa.allocator(); +const c = @cImport({ + @cInclude("hashmap.h"); +}); + +pub fn main() !void { + var map = c.hm_new_managed(2); + + // we allocate with std.c.malloc allocator as hm_map_t uses free on values + var key = @as([*]u8, @ptrCast(std.c.malloc(6) orelse unreachable)); + defer std.c.free(key); + @memcpy(key, "hello"); + + var value = @as([*]u8, @ptrCast(std.c.malloc(6) orelse unreachable)); + defer std.c.free(value); + @memcpy(value, "there"); + + var null_str = [_]u8{ 'n', 'u', 'l', 'l', 0 }; + var null_str_c: [*]u8 = &null_str; + + // insert values + c.hm_insert(&map, key, value); + std.debug.print("inserted key = {s} \n", .{key[0..6]}); + + // find values, cast into specific type if needed + var matched_idx: usize = 0; + var val = c.hm_find(map, key, &matched_idx); + std.debug.print("found key at index = {any}, val = {s}\n", .{ matched_idx, if (val) |v| v else null_str_c }); + + var buff = [_]u8{0} ** 24; + var matched_key: [*c]u8 = @as([*c]u8, buff[0..]); + + // remove values + var val_removed = c.hm_remove(map, key, &matched_key); + std.debug.print("removed key with value = {s} \n", .{@as([*:0]u8, val_removed)}); +} diff --git a/c/swissmap/include/hashmap.h b/c/swissmap/include/hashmap.h new file mode 100644 index 000000000..d0fb9063f --- /dev/null +++ b/c/swissmap/include/hashmap.h @@ -0,0 +1,150 @@ +#ifndef HM_H +#define HM_H + +#include +#include +#include +#include + +#ifdef __aarch64__ +#include +#else +#include +#endif + +#ifndef HM_DEFAULT_N_GROUPS +#define HM_DEFAULT_N_GROUPS (1) +#endif + +#ifndef HM_LOAD_FACTOR +#define HM_LOAD_FACTOR (0.75) +#endif + +#ifndef HM_RESIZE_FACTOR +#define HM_RESIZE_FACTOR (2) +#endif + +#define HM_GROUP_SIZE (16) +#define HM_CONTROL_SIZE (16) + +typedef char hm_key_t; +typedef char hm_value_t; + +#ifdef __aarch64__ +typedef int32x4_t hm_control_t; +#elif __x86_64__ +typedef __m128i hm_control_t; +#endif + +typedef int8_t hm_metadata_t; + +typedef size_t (*hm_hashfn_t)(hm_key_t *key); +typedef bool (*hm_cmpfn_t)(hm_key_t *key1, hm_key_t *key2); + +typedef enum +{ + HM_EMPTY = 0b10000000, + HM_DELETED = 0b11111111 +} hm_ctrl_e; + +typedef struct +{ + size_t pos; + hm_metadata_t meta; +} hm_hash_t; + +typedef struct +{ + hm_control_t _ctrl; + hm_key_t *key[HM_CONTROL_SIZE]; + hm_hash_t hash[HM_CONTROL_SIZE]; +} hm_group_t; + +typedef struct +{ + hm_group_t *groups; + hm_value_t **values; + size_t items; + size_t n_groups; + size_t sentinel; + size_t size; + + hm_hashfn_t hashfn; + hm_cmpfn_t cmpfn; +} hm_map_t; + +size_t hash_djb2(char *str); +bool str_equals(char *val1, char *val2); +static inline hm_control_t zero_lowest_n_bytes( + hm_control_t _ctrl, hm_metadata_t n) + __attribute__((always_inline)); +static inline size_t hm_pos( + size_t hash) + __attribute__((always_inline)); +static inline hm_metadata_t hm_meta( + size_t hash) + __attribute__((always_inline)); +// TODO change hm_group_pos return type to hm_metadata_t +static inline hm_metadata_t hm_group_pos( + size_t idx) + __attribute__((always_inline)); +static inline size_t hm_idx( + size_t group, hm_metadata_t group_pos) + __attribute__((always_inline)); +static inline hm_hash_t hm_hash( + hm_map_t *map, hm_key_t *key) + __attribute__((always_inline)); +static inline bool hm_should_resize( + hm_map_t *map) + __attribute__((always_inline)); +static inline uint16_t hm_match_full( + hm_map_t *map, size_t group) + __attribute__((always_inline)); +static inline size_t hm_group( + size_t idx) + __attribute__((always_inline)); +static inline size_t hm_sentinel_group( + hm_map_t *map) + __attribute__((always_inline)); +static inline size_t hm_last_group( + hm_map_t *map) + __attribute__((always_inline)); + +static uint16_t inline hm_probe( + hm_metadata_t meta, hm_control_t _ctrl) + __attribute__((always_inline)); +static inline uint16_t hm_probe_from( + hm_metadata_t group_pos, hm_metadata_t meta, + hm_control_t _ctrl) + __attribute__((always_inline)); +static inline bool hm_match_metadata( + hm_map_t *map, hm_metadata_t meta, size_t group, + size_t *match_idx) + __attribute__((always_inline)); +static inline bool hm_match_metadata_from( + hm_map_t *map, hm_metadata_t meta, hm_hash_t *hash, + size_t group, hm_metadata_t group_pos, size_t *match_idx) + __attribute__((always_inline)); +static inline hm_value_t *hm_find_hash( + hm_map_t *map, hm_hash_t *hash, hm_key_t *key, + size_t group, hm_metadata_t group_pos, size_t *match_idx) + __attribute__((always_inline)); + +static inline hm_map_t *hm_resize( + hm_map_t *map) + __attribute__((always_inline)); +static inline void hm_insert_at( + hm_map_t *map, size_t group, hm_metadata_t group_pos, + hm_hash_t hash, hm_key_t *key, hm_value_t *value) + __attribute__((always_inline)); + +hm_value_t *hm_find(hm_map_t *map, hm_key_t *key, size_t *match_idx); +hm_map_t *hm_new(size_t n_groups, hm_hashfn_t hashfn, hm_cmpfn_t cmpfn); +hm_map_t *hm_new_managed(size_t n_groups); +void hm_clear(hm_map_t *map); +void hm_erase(hm_map_t *map); +void hm_insert(hm_map_t **map, hm_key_t *key, hm_value_t *value); +hm_value_t *hm_remove(hm_map_t *map, hm_key_t *key, hm_key_t **match_key); +bool hm_iterate(hm_map_t *map, size_t *idx, hm_key_t **key_ref, hm_value_t **value_ref); + +#endif diff --git a/c/swissmap/include/main.h b/c/swissmap/include/main.h new file mode 100644 index 000000000..c0ac3963a --- /dev/null +++ b/c/swissmap/include/main.h @@ -0,0 +1,15 @@ +#ifndef MAIN_H +#define MAIN_H + +#include +#include +#include "hashmap.h" + +#define STR_CHARS \ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.-#'?!" + +char *rand_string(int length); +bool str_equals(char *val1, char *val2); +void bench_print(char *op, size_t cnt, double time_spent); + +#endif diff --git a/c/swissmap/lib/hashmap.c b/c/swissmap/lib/hashmap.c new file mode 100644 index 000000000..9baf1c19a --- /dev/null +++ b/c/swissmap/lib/hashmap.c @@ -0,0 +1,476 @@ +#include +#include + +#include "hashmap.h" + +alignas(32) static const hm_metadata_t mask[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +static hm_control_t zero_lowest_n_bytes(hm_control_t _ctrl, hm_metadata_t n) +{ +#ifdef __aarch64__ + // NEON implementation + int32x4_t _m = vld1q_s32((const int32_t *)&mask[16 - n]); + return vandq_s32(_ctrl, _m); +#else + // SSE implementation + hm_control_t _m = _mm_loadu_si128((hm_control_t *)&mask[16 - n]); + return _mm_and_si128(_ctrl, _m); +#endif +} + +static size_t hm_pos(size_t hash) +{ + return hash >> 7; +} + +static hm_metadata_t hm_meta(size_t hash) +{ + return hash & 0x7f; +} + +static size_t hm_idx(size_t group, hm_metadata_t group_pos) +{ + return (group * HM_GROUP_SIZE) + group_pos; +} + +static size_t hm_group(size_t idx) +{ + return idx / HM_GROUP_SIZE; +} + +static hm_metadata_t hm_group_pos(size_t idx) +{ + return idx % HM_GROUP_SIZE; +} + +static size_t hm_sentinel_group(hm_map_t *map) +{ + return hm_group(map->sentinel) + 1; +} + +static size_t hm_last_group(hm_map_t *map) +{ + return hm_group(map->size - 1) + 1; +} + +static hm_hash_t hm_hash(hm_map_t *map, hm_key_t *key) +{ + hm_hash_t hash; + size_t h = map->hashfn(key); + hash.pos = hm_pos(h); + hash.meta = hm_meta(h); + return hash; +} + +static bool hm_should_resize(hm_map_t *map) +{ + return map->items >= (HM_LOAD_FACTOR * map->size); +} + +uint16_t neon_movemask_epi8(uint8x16_t input) +{ + // Compare each byte with zero + uint8x16_t mask = vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(input), 7)); + + // Create a lookup table + const uint8x8_t bit_mask = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}; + uint8x8_t low = vand_u8(vget_low_u8(mask), bit_mask); + uint8x8_t high = vand_u8(vget_high_u8(mask), bit_mask); + + // Sum up the bits + uint16_t low_sum = vaddv_u8(low); // Sum up the lower 8 bytes + uint16_t high_sum = vaddv_u8(high); // Sum up the higher 8 bytes + + return low_sum | (high_sum << 8); +} + +static uint16_t hm_match_full(hm_map_t *map, size_t group) +{ +#ifdef __aarch64__ + return neon_movemask_epi8(map->groups[group]._ctrl); +#else + return ~(_mm_movemask_epi8(map->groups[group]._ctrl)); +#endif +} + +static uint16_t hm_probe(hm_metadata_t meta, hm_control_t _ctrl) +{ + hm_control_t _match; +#ifdef __aarch64__ + _match = vdupq_n_s8(meta); + return neon_movemask_epi8(vceqq_s8(_match, _ctrl)); +#else + _match = _mm_set1_epi8(meta); + return _mm_movemask_epi8(_mm_cmpeq_epi8(_match, _ctrl)); +#endif +} + +static uint16_t hm_probe_from(hm_metadata_t group_pos, hm_metadata_t meta, + hm_control_t _ctrl) +{ + hm_control_t _match; +#ifdef __aarch64__ + _match = vdupq_n_s8(meta); + return neon_movemask_epi8( + zero_lowest_n_bytes( + vceqq_s8(_match, _ctrl), group_pos)); +#else + _match = _mm_set1_epi8(meta); + return _mm_movemask_epi8( + zero_lowest_n_bytes( + _mm_cmpeq_epi8(_match, _ctrl), group_pos)); +#endif +} + +unsigned int trailing_zeroes(unsigned int value) +{ + if (value == 0) + { + return 32; + } +#ifdef __aarch64__ + return __builtin_ctz(value); +#else + return _tzcnt_u32(value); +#endif +} + +static bool hm_match_metadata(hm_map_t *map, hm_metadata_t meta, + size_t group, size_t *match_idx) +{ + hm_metadata_t match_group_pos = trailing_zeroes(hm_probe( + meta, map->groups[group]._ctrl)); + + *match_idx = hm_idx(group, match_group_pos); + return (match_group_pos < 32) ? true : false; +} + +static bool hm_match_metadata_from(hm_map_t *map, hm_metadata_t meta, + hm_hash_t *hash, size_t group, + hm_metadata_t group_pos, size_t *match_idx) +{ + hm_metadata_t match_group_pos = trailing_zeroes(hm_probe_from( + group_pos, meta, map->groups[group]._ctrl)); + + *match_idx = hm_idx(group, match_group_pos); + return (match_group_pos < 32) ? true : false; +} + +unsigned int blsr_u32(unsigned int x) +{ + return x & (x - 1); +} + +static hm_value_t *hm_find_hash(hm_map_t *map, + hm_hash_t *hash, hm_key_t *key, + size_t group, hm_metadata_t group_pos, size_t *match_idx) +{ + uint16_t matches = hm_probe_from( + group_pos, hash->meta, map->groups[group]._ctrl); + + while (matches) + { + hm_metadata_t match_group_pos = trailing_zeroes(matches); + + if (map->cmpfn(map->groups[group].key[match_group_pos], key)) + { + *match_idx = hm_idx(group, match_group_pos); + return map->values[*match_idx]; + } + + matches = blsr_u32(matches); + } + if (hm_match_metadata_from( + map, HM_EMPTY, hash, group, group_pos, match_idx)) + return NULL; + + size_t end_group = hm_sentinel_group(map); + + while (true) + { + group = (group + 1) % end_group; + + matches = hm_probe(hash->meta, map->groups[group]._ctrl); + + while (matches) + { + hm_metadata_t match_group_pos = trailing_zeroes(matches); + + if (map->cmpfn(map->groups[group].key[match_group_pos], key)) + { + *match_idx = hm_idx(group, match_group_pos); + return map->values[*match_idx]; + } + + matches = blsr_u32(matches); + } + if (hm_match_metadata(map, HM_EMPTY, group, match_idx)) + return NULL; + } +} + +hm_value_t *hm_find(hm_map_t *map, hm_key_t *key, size_t *match_idx) +{ + hm_hash_t hash = hm_hash(map, key); + size_t idx = hash.pos % map->size; + size_t group = hm_group(idx); + hm_metadata_t group_pos = hm_group_pos(idx); + return hm_find_hash(map, &hash, key, group, group_pos, match_idx); +} + +hm_map_t *hm_new_managed(size_t n_groups) +{ + return hm_new(n_groups, &hash_djb2, &str_equals); +} + +hm_map_t *hm_new(size_t n_groups, hm_hashfn_t hashfn, hm_cmpfn_t cmpfn) +{ + hm_map_t *map = malloc(sizeof(hm_map_t)); + if (!map) + return NULL; + + map->n_groups = n_groups; + map->sentinel = 0; + map->size = map->n_groups * HM_GROUP_SIZE; + map->items = 0; + map->hashfn = hashfn; + map->cmpfn = cmpfn; + + if (map->size > 0) + { + map->values = malloc(map->size * sizeof(hm_value_t *)); + map->groups = malloc(map->n_groups * sizeof(hm_group_t)); + if (!map->values || !map->groups) + return NULL; + + hm_control_t _empty; +#ifdef __aarch64__ + _empty = vdupq_n_s8(HM_EMPTY); +#else + _empty = _mm_set1_epi8(HM_EMPTY); +#endif + for (size_t group = 0; group < map->n_groups; group++) + { + map->groups[group]._ctrl = _empty; + } + } + else + { + map->values = NULL; + map->groups = NULL; + } + return map; +} + +void hm_clear(hm_map_t *map) +{ + hm_control_t _empty; +#ifdef __aarch64__ + _empty = vdupq_n_s8(HM_EMPTY); +#else + _empty = _mm_set1_epi8(HM_EMPTY); +#endif + size_t end_group = hm_sentinel_group(map); + + for (size_t group = 0; group < end_group; group++) + map->groups[group]._ctrl = _empty; + map->items = 0; +} + +void hm_erase(hm_map_t *map) +{ + free(map->groups); + free(map->values); +} + +hm_map_t *hm_resize(hm_map_t *map) +{ + size_t new_n_groups = (map->n_groups == 0) + ? HM_DEFAULT_N_GROUPS + : HM_RESIZE_FACTOR * map->n_groups; + hm_map_t *new_map = hm_new(new_n_groups, map->hashfn, map->cmpfn); + + if (map->n_groups > 0) + { + hm_metadata_t old_group_pos; + size_t old_idx, old_group; + size_t old_end_group = hm_sentinel_group(map); + + size_t new_idx, new_group; + hm_metadata_t new_group_pos; + + hm_hash_t hash; + uint16_t match_full; + + for (old_group = 0; old_group < old_end_group; old_group++) + { + match_full = hm_match_full(map, old_group); + + while (match_full) + { + old_group_pos = trailing_zeroes(match_full); + + old_idx = hm_idx(old_group, old_group_pos); + hash = map->groups[old_group].hash[old_group_pos]; + + new_idx = hash.pos % new_map->size; + new_group = hm_group(new_idx); + new_group_pos = hm_group_pos(new_idx); + + hm_insert_at( + new_map, new_group, new_group_pos, hash, + map->groups[old_group].key[old_group_pos], + map->values[old_idx]); + + match_full = blsr_u32(match_full); + } + } + } + + hm_erase(map); + free(map); + return new_map; +} + +static void hm_insert_at(hm_map_t *map, size_t group, hm_metadata_t group_pos, + hm_hash_t hash, hm_key_t *key, hm_value_t *value) +{ + size_t match_idx, match_idx_emp, match_idx_del; + + if (hm_match_metadata_from( + map, HM_EMPTY, &hash, group, group_pos, &match_idx_emp) | + hm_match_metadata_from( + map, HM_DELETED, &hash, group, group_pos, &match_idx_del)) + { + match_idx = (match_idx_emp < match_idx_del) + ? match_idx_emp + : match_idx_del; + group_pos = hm_group_pos(match_idx); + + ((hm_metadata_t *)&(map->groups[group]._ctrl))[group_pos] = hash.meta; + map->groups[group].key[group_pos] = key; + map->groups[group].hash[group_pos] = hash; + map->values[match_idx] = value; + map->items++; + + if (match_idx > map->sentinel) + map->sentinel = match_idx; + return; + } + + size_t end_group = hm_last_group(map); + + while (true) + { + group = (group + 1) % end_group; + + if (hm_match_metadata(map, HM_EMPTY, group, &match_idx_emp) | hm_match_metadata(map, HM_DELETED, group, &match_idx_del)) + { + match_idx = (match_idx_emp < match_idx_del) + ? match_idx_emp + : match_idx_del; + group_pos = hm_group_pos(match_idx); + + ((hm_metadata_t *)&(map->groups[group]._ctrl))[group_pos] = hash.meta; + map->groups[group].hash[group_pos] = hash; + map->groups[group].key[group_pos] = key; + map->values[match_idx] = value; + map->items++; + + if (match_idx > map->sentinel) + map->sentinel = match_idx; + return; + } + } +} + +void hm_insert(hm_map_t **map_ref, hm_key_t *key, hm_value_t *value) +{ + if (hm_should_resize(*map_ref) || (*map_ref)->size == 0) + *map_ref = hm_resize((*map_ref)); + + size_t match_idx; + hm_hash_t hash = hm_hash((*map_ref), key); + size_t idx = hash.pos % (*map_ref)->size; + size_t group = hm_group(idx); + hm_metadata_t group_pos = hm_group_pos(idx); + + hm_value_t *match_value = hm_find_hash( + (*map_ref), &hash, key, group, group_pos, &match_idx); + if (match_value) + { + free(key); + free(match_value); + return; + } + hm_insert_at((*map_ref), group, group_pos, hash, key, value); +} + +hm_value_t *hm_remove(hm_map_t *map, hm_key_t *key, hm_key_t **match_key_ref) +{ + hm_hash_t hash = hm_hash(map, key); + size_t idx = hash.pos % map->size; + size_t group = hm_group(idx); + hm_metadata_t group_pos = hm_group_pos(idx); + size_t match_idx; + + hm_value_t *match_value = hm_find_hash( + map, &hash, key, group, group_pos, &match_idx); + if (match_value) + { + group = hm_group(match_idx); + group_pos = hm_group_pos(match_idx); + + ((hm_metadata_t *)&(map->groups[group]._ctrl))[group_pos] = HM_DELETED; + *match_key_ref = map->groups[group].key[group_pos]; + map->items--; + + if (match_idx == map->sentinel) + map->sentinel--; + return match_value; + } + return NULL; +} + +bool hm_iterate(hm_map_t *map, size_t *idx, + hm_key_t **key_ref, hm_value_t **value_ref) +{ + if (*idx > map->sentinel) + return false; + + size_t group = hm_group(*idx); + hm_metadata_t group_pos = hm_group_pos(*idx); + + if (((hm_metadata_t *)&(map->groups[group]._ctrl))[group_pos] < 0) + { + *key_ref = NULL; + *value_ref = NULL; + } + else + { + *key_ref = map->groups[group].key[group_pos]; + *value_ref = map->values[*idx]; + } + (*idx)++; + return true; +} + +/* + * DJB2 Hash Function. + */ +size_t hash_djb2(char *str) +{ + size_t hash = 5381; + int c; + + while (c = *str++) + hash = ((hash << 5) + hash) + c; + return hash; +} + +bool str_equals(char *val1, char *val2) +{ + return !strcmp(val1, val2); +} diff --git a/c/swissmap/src/main.c b/c/swissmap/src/main.c new file mode 100644 index 000000000..de78b0c68 --- /dev/null +++ b/c/swissmap/src/main.c @@ -0,0 +1,102 @@ +#include "main.h" + +int main() +{ + const size_t cnt = 500000; + char *tmp, *keys[cnt], *values[cnt]; + const int key_len = 11, value_len = 3; + + for (uint64_t i = 0; i < cnt; i++) + { + keys[i] = rand_string(key_len); + values[i] = rand_string(value_len); + } + + double time_spent; + const size_t cap = 0; + size_t i; + clock_t begin, end; + hm_map_t *map = hm_new(cap, &hash_djb2, &str_equals); + printf("Swissmap Benchmarks (Key: String):\n"); + + begin = clock(); + for (i = 0; i < cnt; i++) + hm_insert(&map, keys[i], values[i]); + end = clock(); + time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + bench_print("set", cnt, time_spent); + + size_t idx; + begin = clock(); + for (i = 0; i < cnt; i++) + if (!hm_find(map, keys[i], &idx)) + printf("-----\n"); + end = clock(); + time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + bench_print("get", cnt, time_spent); + + hm_key_t *match_key; + size_t n_removed = 0; + begin = clock(); + for (i = 0; i < cnt; i++) + { + hm_value_t *match_value = hm_remove(map, keys[i], &match_key); + if (match_value) + { + free(match_key); + free(match_value); + n_removed++; + } + } + end = clock(); + time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + bench_print("del", cnt, time_spent); + + printf("\n\nAfter removing %llu values:\n", n_removed); + hm_key_t *key_it; + hm_value_t *value_it; + size_t idx_it = 0, manual_cnt = 0; + while (hm_iterate(map, &idx_it, &key_it, &value_it)) + { + if (key_it) + { + printf("key = %s removed at idx %llu\n", key_it, idx_it - 1); + free(key_it); + free(value_it); + manual_cnt++; + } + } + printf("manual count of items = %llu\n", manual_cnt); + printf("items = %llu\n", map->items); + printf("size = %llu\n", map->size); + printf("sentinel = %llu\n", map->sentinel); + + hm_clear(map); + hm_erase(map); + free(map); +} + +char *rand_string(int length) +{ + static int seed = 25011984; + char *string = STR_CHARS; + size_t string_len = strlen(string); + char *rand_string = NULL; + + srand(time(NULL) * length + ++seed); + rand_string = malloc(sizeof(char) * (length + 1)); + for (int n = 0; n < length; n++) + { + short key = rand() % string_len; + rand_string[n] = string[key]; + } + rand_string[length] = '\0'; + return rand_string; +} + +void bench_print(char *iter, size_t cnt, double time_spent) +{ + const size_t ns = 1000000000; + printf("%s\t%d iters -> %lf seconds, %.0lf ns/iter %.0lf iter/sec\n", + iter, cnt, time_spent, time_spent * ns / cnt, cnt / time_spent); +} diff --git a/src/core/accounts_db.zig b/src/core/accounts_db.zig index ac8a0ea3e..78dc0d6db 100644 --- a/src/core/accounts_db.zig +++ b/src/core/accounts_db.zig @@ -43,6 +43,11 @@ const unpackZstdTarBall = @import("./snapshots.zig").unpackZstdTarBall; const Logger = @import("../trace/log.zig").Logger; const Level = @import("../trace/level.zig").Level; +// We can import c.hm_map_t here +// const c = @cImport({ +// @cInclude("hashmap.h"); +// }); + pub const MERKLE_FANOUT: usize = 16; pub const ACCOUNT_INDEX_BINS: usize = 8192;