From 639db215f7111882fa16457c08fde46f39fcc31f Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Thu, 2 Jan 2025 16:02:12 -0500 Subject: [PATCH] deps: update simdutf to 5.7.2 PR-URL: https://github.com/nodejs/node/pull/56388 Reviewed-By: Luigi Pinca Reviewed-By: Rafael Gonzaga Reviewed-By: James M Snell --- deps/simdutf/simdutf.cpp | 79 ++++++++++++++++++++++++++-------------- deps/simdutf/simdutf.h | 6 +-- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index f1dcbe488015e6..12a2f494e0a7aa 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-12-17 14:54:59 -0500. Do not edit! */ +/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */ /* begin file src/simdutf.cpp */ #include "simdutf.h" // We include base64_tables once. @@ -697,6 +697,15 @@ static_assert(to_base64_url_value[uint8_t('_')] == 63, #include #include +static_assert(sizeof(uint8_t) == sizeof(char), + "simdutf requires that uint8_t be a char"); +static_assert(sizeof(uint16_t) == sizeof(char16_t), + "simdutf requires that char16_t be 16 bits"); +static_assert(sizeof(uint32_t) == sizeof(char32_t), + "simdutf requires that char32_t be 32 bits"); +// next line is redundant, but it is kept to catch defective systems. +static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes"); + // Useful for debugging purposes namespace simdutf { namespace { @@ -9746,24 +9755,23 @@ inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) { } template -inline simdutf_warn_unused bool validate(const char16_t *buf, +inline simdutf_warn_unused bool validate(const char16_t *data, size_t len) noexcept { - const uint16_t *data = reinterpret_cast(buf); uint64_t pos = 0; while (pos < len) { - uint16_t word = + char16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; if ((word & 0xF800) == 0xD800) { if (pos + 1 >= len) { return false; } - uint16_t diff = uint16_t(word - 0xD800); + char16_t diff = char16_t(word - 0xD800); if (diff > 0x3FF) { return false; } - uint16_t next_word = + char16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); + char16_t diff2 = char16_t(next_word - 0xDC00); if (diff2 > 0x3FF) { return false; } @@ -9776,24 +9784,23 @@ inline simdutf_warn_unused bool validate(const char16_t *buf, } template -inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, +inline simdutf_warn_unused result validate_with_errors(const char16_t *data, size_t len) noexcept { - const uint16_t *data = reinterpret_cast(buf); size_t pos = 0; while (pos < len) { - uint16_t word = + char16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos]; if ((word & 0xF800) == 0xD800) { if (pos + 1 >= len) { return result(error_code::SURROGATE, pos); } - uint16_t diff = uint16_t(word - 0xD800); + char16_t diff = char16_t(word - 0xD800); if (diff > 0x3FF) { return result(error_code::SURROGATE, pos); } - uint16_t next_word = + char16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); + char16_t diff2 = uint16_t(next_word - 0xDC00); if (diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); } @@ -9806,24 +9813,22 @@ inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, } template -inline size_t count_code_points(const char16_t *buf, size_t len) { +inline size_t count_code_points(const char16_t *p, size_t len) { // We are not BOM aware. - const uint16_t *p = reinterpret_cast(buf); size_t counter{0}; for (size_t i = 0; i < len; i++) { - uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; + char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; counter += ((word & 0xFC00) != 0xDC00); } return counter; } template -inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) { +inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) { // We are not BOM aware. - const uint16_t *p = reinterpret_cast(buf); size_t counter{0}; for (size_t i = 0; i < len; i++) { - uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; + char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; counter++; // ASCII counter += static_cast( word > @@ -9835,12 +9840,11 @@ inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) { } template -inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) { +inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) { // We are not BOM aware. - const uint16_t *p = reinterpret_cast(buf); size_t counter{0}; for (size_t i = 0; i < len; i++) { - uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; + char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i]; counter += ((word & 0xFC00) != 0xDC00); } return counter; @@ -9848,12 +9852,10 @@ inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) { inline size_t latin1_length_from_utf16(size_t len) { return len; } -simdutf_really_inline void change_endianness_utf16(const char16_t *in, - size_t size, char16_t *out) { - const uint16_t *input = reinterpret_cast(in); - uint16_t *output = reinterpret_cast(out); +simdutf_really_inline void +change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { for (size_t i = 0; i < size; i++) { - *output++ = uint16_t(input[i] >> 8 | input[i] << 8); + *output++ = char16_t(input[i] >> 8 | input[i] << 8); } } @@ -21042,6 +21044,9 @@ struct validating_transcoder { uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -26717,6 +26722,14 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, } if (!ignore_garbage && equalsigns > 0) { + if (last_chunk_options == last_chunk_handling_options::strict) { + return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), + size_t(dst - dstinit)}; + } + if (last_chunk_options == + last_chunk_handling_options::stop_before_partial) { + return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; + } if ((size_t(dst - dstinit) % 3 == 0) || ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; @@ -33161,6 +33174,9 @@ struct validating_transcoder { uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -43013,6 +43029,9 @@ struct validating_transcoder { uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -48110,6 +48129,9 @@ struct validating_transcoder { uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly @@ -54454,6 +54476,9 @@ struct validating_transcoder { uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in // this case, we also have ASCII to account for. + if (utf8_continuation_mask & 1) { + return 0; // error + } uint64_t utf8_leading_mask = ~utf8_continuation_mask; uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; // We process in blocks of up to 12 bytes except possibly diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index 383687215b010c..9a4b4580da91a1 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-12-17 14:54:59 -0500. Do not edit! */ +/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -675,7 +675,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "5.7.0" +#define SIMDUTF_VERSION "5.7.2" namespace simdutf { enum { @@ -690,7 +690,7 @@ enum { /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 0 + SIMDUTF_VERSION_REVISION = 2 }; } // namespace simdutf