From 9efa083c346ebce162956b6deb6675c7cd36e377 Mon Sep 17 00:00:00 2001
From: Hacl Bot <hacl-star@mailo.com>
Date: Tue, 20 Aug 2024 16:51:06 +0000
Subject: [PATCH] [CI] update code

---
 include/Hacl_Bignum32.h                       |  382 +++---
 include/Hacl_Hash_Blake2b.h                   |   43 +-
 include/Hacl_Hash_Blake2b_Simd256.h           |   98 +-
 include/Hacl_Hash_Blake2s.h                   |  102 +-
 include/Hacl_Hash_Blake2s_Simd128.h           |   98 +-
 include/Hacl_Hash_SHA3.h                      |    6 +-
 include/Hacl_Hash_SHA3_Simd256.h              |   12 +-
 include/internal/Hacl_Bignum_Base.h           |   50 +-
 include/internal/Hacl_Bignum_K256.h           |   78 +-
 include/internal/Hacl_Frodo_KEM.h             |    2 +-
 include/internal/Hacl_Hash_Blake2b.h          |   16 +-
 include/internal/Hacl_Hash_Blake2b_Simd256.h  |    1 +
 include/internal/Hacl_Hash_Blake2s.h          |    1 +
 include/internal/Hacl_Hash_Blake2s_Simd128.h  |    1 +
 include/msvc/Hacl_Bignum32.h                  |  382 +++---
 include/msvc/Hacl_Hash_Blake2b.h              |   43 +-
 include/msvc/Hacl_Hash_Blake2b_Simd256.h      |   98 +-
 include/msvc/Hacl_Hash_Blake2s.h              |  102 +-
 include/msvc/Hacl_Hash_Blake2s_Simd128.h      |   98 +-
 include/msvc/Hacl_Hash_SHA3.h                 |    6 +-
 include/msvc/Hacl_Hash_SHA3_Simd256.h         |   12 +-
 include/msvc/internal/Hacl_Hash_Blake2b.h     |   16 +-
 .../msvc/internal/Hacl_Hash_Blake2b_Simd256.h |    1 +
 include/msvc/internal/Hacl_Hash_Blake2s.h     |    1 +
 .../msvc/internal/Hacl_Hash_Blake2s_Simd128.h |    1 +
 karamel/include/krml/internal/target.h        |   20 +
 ocaml/ctypes.depend                           |   10 +-
 .../lib/Hacl_Hash_Blake2b_Simd256_bindings.ml |    4 +-
 ocaml/lib/Hacl_Hash_Blake2b_bindings.ml       |   30 +-
 .../lib/Hacl_Hash_Blake2s_Simd128_bindings.ml |    4 +-
 ocaml/lib/Hacl_Hash_Blake2s_bindings.ml       |   22 +-
 src/EverCrypt_HMAC.c                          |   32 +-
 src/EverCrypt_Hash.c                          |  209 ++--
 src/Hacl_AEAD_Chacha20Poly1305.c              |    6 +-
 src/Hacl_AEAD_Chacha20Poly1305_Simd128.c      |    6 +-
 src/Hacl_AEAD_Chacha20Poly1305_Simd256.c      |    6 +-
 src/Hacl_Bignum.c                             |  396 +++++--
 src/Hacl_Bignum256.c                          |  160 ++-
 src/Hacl_Bignum256_32.c                       |  160 ++-
 src/Hacl_Bignum32.c                           |  460 +++++---
 src/Hacl_Bignum4096.c                         |  144 ++-
 src/Hacl_Bignum4096_32.c                      |  144 ++-
 src/Hacl_Bignum64.c                           |   78 +-
 src/Hacl_Chacha20.c                           |   22 +-
 src/Hacl_Chacha20_Vec128.c                    |   12 +-
 src/Hacl_Chacha20_Vec256.c                    |   12 +-
 src/Hacl_Chacha20_Vec32.c                     |   12 +-
 src/Hacl_Curve25519_51.c                      |  192 +--
 src/Hacl_Curve25519_64.c                      |  190 ++-
 src/Hacl_EC_K256.c                            |    2 +-
 src/Hacl_Ed25519.c                            |  215 +++-
 src/Hacl_FFDHE.c                              |    4 +-
 src/Hacl_Frodo1344.c                          |    2 +-
 src/Hacl_Frodo64.c                            |    2 +-
 src/Hacl_Frodo640.c                           |    2 +-
 src/Hacl_Frodo976.c                           |    2 +-
 src/Hacl_GenericField32.c                     |  134 ++-
 src/Hacl_GenericField64.c                     |  134 ++-
 src/Hacl_HMAC.c                               |   32 +-
 src/Hacl_HMAC_Blake2b_256.c                   |    5 +-
 src/Hacl_HMAC_Blake2s_128.c                   |    6 +-
 src/Hacl_Hash_Blake2b.c                       |  178 ++-
 src/Hacl_Hash_Blake2b_Simd256.c               |  247 ++--
 src/Hacl_Hash_Blake2s.c                       |  259 ++--
 src/Hacl_Hash_Blake2s_Simd128.c               |  248 ++--
 src/Hacl_Hash_MD5.c                           |  106 +-
 src/Hacl_Hash_SHA1.c                          |  106 +-
 src/Hacl_Hash_SHA2.c                          |  262 ++--
 src/Hacl_Hash_SHA3.c                          |    6 +-
 src/Hacl_Hash_SHA3_Simd256.c                  |   12 +-
 src/Hacl_K256_ECDSA.c                         |  483 ++++++--
 src/Hacl_MAC_Poly1305.c                       |  120 +-
 src/Hacl_MAC_Poly1305_Simd128.c               |  120 +-
 src/Hacl_MAC_Poly1305_Simd256.c               |  120 +-
 src/Hacl_NaCl.c                               |    4 +-
 src/Hacl_P256.c                               | 1050 +++++++++++++----
 src/Hacl_RSAPSS.c                             |    6 +-
 src/Hacl_SHA2_Vec128.c                        |    8 +-
 src/Hacl_SHA2_Vec256.c                        |   16 +-
 src/Hacl_Salsa20.c                            |   34 +-
 src/msvc/EverCrypt_HMAC.c                     |   14 +-
 src/msvc/EverCrypt_Hash.c                     |    6 +-
 src/msvc/Hacl_Bignum32.c                      |  382 +++---
 src/msvc/Hacl_HMAC.c                          |   14 +-
 src/msvc/Hacl_HMAC_Blake2b_256.c              |    3 +
 src/msvc/Hacl_HMAC_Blake2s_128.c              |    4 +-
 src/msvc/Hacl_Hash_Blake2b.c                  |  178 ++-
 src/msvc/Hacl_Hash_Blake2b_Simd256.c          |  247 ++--
 src/msvc/Hacl_Hash_Blake2s.c                  |  259 ++--
 src/msvc/Hacl_Hash_Blake2s_Simd128.c          |  248 ++--
 src/msvc/Hacl_Hash_SHA3.c                     |    6 +-
 src/msvc/Hacl_Hash_SHA3_Simd256.c             |   12 +-
 src/wasm/EverCrypt_Hash.wasm                  |  Bin 58084 -> 58090 bytes
 src/wasm/Hacl_Bignum.wasm                     |  Bin 74661 -> 74661 bytes
 src/wasm/Hacl_Bignum32.wasm                   |  Bin 13286 -> 13286 bytes
 src/wasm/Hacl_Bignum_Base.wasm                |  Bin 24692 -> 24692 bytes
 src/wasm/Hacl_GenericField64.wasm             |  Bin 11718 -> 11718 bytes
 src/wasm/Hacl_HKDF_Blake2s_128.wasm           |  Bin 1392 -> 1392 bytes
 src/wasm/Hacl_HMAC.wasm                       |  Bin 28160 -> 28174 bytes
 src/wasm/Hacl_Hash_Blake2b.wasm               |  Bin 22942 -> 23807 bytes
 src/wasm/Hacl_Hash_Blake2b_Simd256.wasm       |  Bin 11362 -> 11946 bytes
 src/wasm/Hacl_Hash_Blake2s.wasm               |  Bin 21136 -> 21925 bytes
 src/wasm/Hacl_Hash_Blake2s_Simd128.wasm       |  Bin 10194 -> 10776 bytes
 src/wasm/INFO.txt                             |    4 +-
 src/wasm/layouts.json                         |    2 +-
 105 files changed, 5986 insertions(+), 3299 deletions(-)

diff --git a/include/Hacl_Bignum32.h b/include/Hacl_Bignum32.h
index 84a839a9..709f22d9 100644
--- a/include/Hacl_Bignum32.h
+++ b/include/Hacl_Bignum32.h
@@ -56,9 +56,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
@@ -67,82 +76,134 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -157,22 +218,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -187,18 +256,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
@@ -212,15 +286,16 @@ Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n);
@@ -228,16 +303,18 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -249,21 +326,25 @@ Hacl_Bignum32_mod_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -277,21 +358,25 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -305,14 +390,17 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -330,42 +418,48 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b);
 
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b);
 
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res);
 
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 
@@ -378,14 +472,22 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
diff --git a/include/Hacl_Hash_Blake2b.h b/include/Hacl_Hash_Blake2b.h
index 3403fc83..fcc2d5df 100644
--- a/include/Hacl_Hash_Blake2b.h
+++ b/include/Hacl_Hash_Blake2b.h
@@ -53,6 +53,24 @@ typedef struct Hacl_Hash_Blake2b_blake2_params_s
 }
 Hacl_Hash_Blake2b_blake2_params;
 
+typedef struct Hacl_Hash_Blake2b_index_s
+{
+  uint8_t key_length;
+  uint8_t digest_length;
+  bool last_node;
+}
+Hacl_Hash_Blake2b_index;
+
+#define HACL_HASH_BLAKE2B_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_PERSONAL_BYTES (16U)
+
 typedef struct K____uint64_t___uint64_t__s
 {
   uint64_t *fst;
@@ -64,7 +82,8 @@ typedef struct Hacl_Hash_Blake2b_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint64_t___uint64_t_ thd;
+  bool thd;
+  K____uint64_t___uint64_t_ f3;
 }
 Hacl_Hash_Blake2b_block_state_t;
 
@@ -92,7 +111,11 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
  Specialized allocation function that picks default values for all
@@ -116,7 +139,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void);
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -159,10 +182,14 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s);
 
 /**
   Free state function when there is no key
@@ -198,10 +225,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_Blake2b_Simd256.h b/include/Hacl_Hash_Blake2b_Simd256.h
index af309dc8..f1799e25 100644
--- a/include/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/Hacl_Hash_Blake2b_Simd256.h
@@ -40,6 +40,16 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
+#define HACL_HASH_BLAKE2B_SIMD256_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_PERSONAL_BYTES (16U)
+
 typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s
 {
   Lib_IntVector_Intrinsics_vec256 *fst;
@@ -51,7 +61,8 @@ typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ thd;
+  bool thd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ f3;
 }
 Hacl_Hash_Blake2b_Simd256_block_state_t;
 
@@ -64,34 +75,54 @@ typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
 Hacl_Hash_Blake2b_Simd256_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -101,21 +132,27 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -125,10 +162,19 @@ Hacl_Hash_Blake2b_Simd256_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
   Free state function when there is no key
@@ -136,7 +182,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
 void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state);
@@ -161,8 +207,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_Blake2s.h b/include/Hacl_Hash_Blake2s.h
index ac783473..870f1edc 100644
--- a/include/Hacl_Hash_Blake2s.h
+++ b/include/Hacl_Hash_Blake2s.h
@@ -38,6 +38,16 @@ extern "C" {
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Hash_Blake2b.h"
 
+#define HACL_HASH_BLAKE2S_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_PERSONAL_BYTES (8U)
+
 typedef struct K____uint32_t___uint32_t__s
 {
   uint32_t *fst;
@@ -49,7 +59,8 @@ typedef struct Hacl_Hash_Blake2s_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint32_t___uint32_t_ thd;
+  bool thd;
+  K____uint32_t___uint32_t_ f3;
 }
 Hacl_Hash_Blake2s_block_state_t;
 
@@ -62,30 +73,53 @@ typedef struct Hacl_Hash_Blake2s_state_t_s
 Hacl_Hash_Blake2s_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -95,28 +129,44 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len);
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s);
 
 /**
   Free state function when there is no key
@@ -124,7 +174,7 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state);
 
@@ -148,8 +198,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_Blake2s_Simd128.h b/include/Hacl_Hash_Blake2s_Simd128.h
index d725ee86..2bae1c8e 100644
--- a/include/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/Hacl_Hash_Blake2s_Simd128.h
@@ -39,6 +39,16 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
+#define HACL_HASH_BLAKE2S_SIMD128_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_PERSONAL_BYTES (8U)
+
 typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s
 {
   Lib_IntVector_Intrinsics_vec128 *fst;
@@ -50,7 +60,8 @@ typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ thd;
+  bool thd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ f3;
 }
 Hacl_Hash_Blake2s_Simd128_block_state_t;
 
@@ -63,34 +74,54 @@ typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
 Hacl_Hash_Blake2s_Simd128_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -100,21 +131,27 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -124,10 +161,19 @@ Hacl_Hash_Blake2s_Simd128_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
   Free state function when there is no key
@@ -135,7 +181,7 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
 void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state);
@@ -160,8 +206,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_SHA3.h b/include/Hacl_Hash_SHA3.h
index 8fb78fcd..18f23d8d 100644
--- a/include/Hacl_Hash_SHA3.h
+++ b/include/Hacl_Hash_SHA3.h
@@ -117,7 +117,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s);
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -131,14 +131,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h
index 617e8e34..72162d43 100644
--- a/include/Hacl_Hash_SHA3_Simd256.h
+++ b/include/Hacl_Hash_SHA3_Simd256.h
@@ -139,12 +139,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -161,15 +161,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -192,7 +192,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/include/internal/Hacl_Bignum_Base.h b/include/internal/Hacl_Bignum_Base.h
index f2e282f4..4e0b35cb 100644
--- a/include/internal/Hacl_Bignum_Base.h
+++ b/include/internal/Hacl_Bignum_Base.h
@@ -72,9 +72,9 @@ Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -372,8 +372,8 @@ Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
   memset(res, 0U, (aLen + aLen) * sizeof (uint32_t));
   for (uint32_t i0 = 0U; i0 < aLen; i0++)
   {
-    uint32_t *ab = a;
     uint32_t a_j = a[i0];
+    uint32_t *ab = a;
     uint32_t *res_j = res + i0;
     uint32_t c = 0U;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -400,7 +400,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
     uint32_t r = c;
     res[i0 + i0] = r;
   }
-  uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t a_copy0[aLen + aLen];
+  memset(a_copy0, 0U, (aLen + aLen) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t b_copy0[aLen + aLen];
+  memset(b_copy0, 0U, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(a_copy0, res, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(b_copy0, res, (aLen + aLen) * sizeof (uint32_t));
+  uint32_t r = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, a_copy0, b_copy0, res);
+  uint32_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
   uint32_t tmp[aLen + aLen];
@@ -413,7 +422,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;
   }
-  uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, res, tmp, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t a_copy[aLen + aLen];
+  memset(a_copy, 0U, (aLen + aLen) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t b_copy[aLen + aLen];
+  memset(b_copy, 0U, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(a_copy, res, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(b_copy, tmp, (aLen + aLen) * sizeof (uint32_t));
+  uint32_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, a_copy, b_copy, res);
+  uint32_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -423,8 +441,8 @@ Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
   memset(res, 0U, (aLen + aLen) * sizeof (uint64_t));
   for (uint32_t i0 = 0U; i0 < aLen; i0++)
   {
-    uint64_t *ab = a;
     uint64_t a_j = a[i0];
+    uint64_t *ab = a;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -451,7 +469,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
     uint64_t r = c;
     res[i0 + i0] = r;
   }
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t a_copy0[aLen + aLen];
+  memset(a_copy0, 0U, (aLen + aLen) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t b_copy0[aLen + aLen];
+  memset(b_copy0, 0U, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(a_copy0, res, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(b_copy0, res, (aLen + aLen) * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
   uint64_t tmp[aLen + aLen];
@@ -464,7 +491,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;
   }
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, res, tmp, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t a_copy[aLen + aLen];
+  memset(a_copy, 0U, (aLen + aLen) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t b_copy[aLen + aLen];
+  memset(b_copy, 0U, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(a_copy, res, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(b_copy, tmp, (aLen + aLen) * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
diff --git a/include/internal/Hacl_Bignum_K256.h b/include/internal/Hacl_Bignum_K256.h
index fe72fffe..8a66cf21 100644
--- a/include/internal/Hacl_Bignum_K256.h
+++ b/include/internal/Hacl_Bignum_K256.h
@@ -104,11 +104,11 @@ static inline void Hacl_K256_Field_load_felem(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = tmp;
     uint8_t *bj = b + i * 8U;
     uint64_t u = load64_be(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = tmp;
     os[i] = x;);
   uint64_t s0 = tmp[3U];
   uint64_t s1 = tmp[2U];
@@ -589,7 +589,9 @@ static inline void Hacl_K256_Field_fnegate_conditional_vartime(uint64_t *f, bool
     f[2U] = f2;
     f[3U] = f3;
     f[4U] = f4;
-    Hacl_K256_Field_fnormalize(f, f);
+    uint64_t f_copy[5U] = { 0U };
+    memcpy(f_copy, f, 5U * sizeof (uint64_t));
+    Hacl_K256_Field_fnormalize(f, f_copy);
     return;
   }
 }
@@ -598,7 +600,9 @@ static inline void Hacl_Impl_K256_Finv_fsquare_times_in_place(uint64_t *out, uin
 {
   for (uint32_t i = 0U; i < b; i++)
   {
-    Hacl_K256_Field_fsqr(out, out);
+    uint64_t x_copy[5U] = { 0U };
+    memcpy(x_copy, out, 5U * sizeof (uint64_t));
+    Hacl_K256_Field_fsqr(out, x_copy);
   }
 }
 
@@ -607,7 +611,9 @@ static inline void Hacl_Impl_K256_Finv_fsquare_times(uint64_t *out, uint64_t *a,
   memcpy(out, a, 5U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < b; i++)
   {
-    Hacl_K256_Field_fsqr(out, out);
+    uint64_t x_copy[5U] = { 0U };
+    memcpy(x_copy, out, 5U * sizeof (uint64_t));
+    Hacl_K256_Field_fsqr(out, x_copy);
   }
 }
 
@@ -618,29 +624,53 @@ static inline void Hacl_Impl_K256_Finv_fexp_223_23(uint64_t *out, uint64_t *x2,
   uint64_t x44[5U] = { 0U };
   uint64_t x88[5U] = { 0U };
   Hacl_Impl_K256_Finv_fsquare_times(x2, f, 1U);
-  Hacl_K256_Field_fmul(x2, x2, f);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, x2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x2, f1_copy, f);
   Hacl_Impl_K256_Finv_fsquare_times(x3, x2, 1U);
-  Hacl_K256_Field_fmul(x3, x3, f);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy0, f);
   Hacl_Impl_K256_Finv_fsquare_times(out, x3, 3U);
-  Hacl_K256_Field_fmul(out, out, x3);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy1, x3);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 3U);
-  Hacl_K256_Field_fmul(out, out, x3);
+  uint64_t f1_copy2[5U] = { 0U };
+  memcpy(f1_copy2, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy2, x3);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 2U);
-  Hacl_K256_Field_fmul(out, out, x2);
+  uint64_t f1_copy3[5U] = { 0U };
+  memcpy(f1_copy3, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy3, x2);
   Hacl_Impl_K256_Finv_fsquare_times(x22, out, 11U);
-  Hacl_K256_Field_fmul(x22, x22, out);
+  uint64_t f1_copy4[5U] = { 0U };
+  memcpy(f1_copy4, x22, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x22, f1_copy4, out);
   Hacl_Impl_K256_Finv_fsquare_times(x44, x22, 22U);
-  Hacl_K256_Field_fmul(x44, x44, x22);
+  uint64_t f1_copy5[5U] = { 0U };
+  memcpy(f1_copy5, x44, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x44, f1_copy5, x22);
   Hacl_Impl_K256_Finv_fsquare_times(x88, x44, 44U);
-  Hacl_K256_Field_fmul(x88, x88, x44);
+  uint64_t f1_copy6[5U] = { 0U };
+  memcpy(f1_copy6, x88, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x88, f1_copy6, x44);
   Hacl_Impl_K256_Finv_fsquare_times(out, x88, 88U);
-  Hacl_K256_Field_fmul(out, out, x88);
+  uint64_t f1_copy7[5U] = { 0U };
+  memcpy(f1_copy7, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy7, x88);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 44U);
-  Hacl_K256_Field_fmul(out, out, x44);
+  uint64_t f1_copy8[5U] = { 0U };
+  memcpy(f1_copy8, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy8, x44);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 3U);
-  Hacl_K256_Field_fmul(out, out, x3);
+  uint64_t f1_copy9[5U] = { 0U };
+  memcpy(f1_copy9, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy9, x3);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 23U);
-  Hacl_K256_Field_fmul(out, out, x22);
+  uint64_t f1_copy10[5U] = { 0U };
+  memcpy(f1_copy10, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy10, x22);
 }
 
 static inline void Hacl_Impl_K256_Finv_finv(uint64_t *out, uint64_t *f)
@@ -648,11 +678,17 @@ static inline void Hacl_Impl_K256_Finv_finv(uint64_t *out, uint64_t *f)
   uint64_t x2[5U] = { 0U };
   Hacl_Impl_K256_Finv_fexp_223_23(out, x2, f);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 5U);
-  Hacl_K256_Field_fmul(out, out, f);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy, f);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 3U);
-  Hacl_K256_Field_fmul(out, out, x2);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy0, x2);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 2U);
-  Hacl_K256_Field_fmul(out, out, f);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy1, f);
 }
 
 static inline void Hacl_Impl_K256_Finv_fsqrt(uint64_t *out, uint64_t *f)
@@ -660,7 +696,9 @@ static inline void Hacl_Impl_K256_Finv_fsqrt(uint64_t *out, uint64_t *f)
   uint64_t x2[5U] = { 0U };
   Hacl_Impl_K256_Finv_fexp_223_23(out, x2, f);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 6U);
-  Hacl_K256_Field_fmul(out, out, x2);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy, x2);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 2U);
 }
 
diff --git a/include/internal/Hacl_Frodo_KEM.h b/include/internal/Hacl_Frodo_KEM.h
index 34b1816a..78593991 100644
--- a/include/internal/Hacl_Frodo_KEM.h
+++ b/include/internal/Hacl_Frodo_KEM.h
@@ -182,9 +182,9 @@ Hacl_Impl_Matrix_matrix_from_lbytes(uint32_t n1, uint32_t n2, uint8_t *b, uint16
 {
   for (uint32_t i = 0U; i < n1 * n2; i++)
   {
-    uint16_t *os = res;
     uint16_t u = load16_le(b + 2U * i);
     uint16_t x = u;
+    uint16_t *os = res;
     os[i] = x;
   }
 }
diff --git a/include/internal/Hacl_Hash_Blake2b.h b/include/internal/Hacl_Hash_Blake2b.h
index 6928d205..2dad4b01 100644
--- a/include/internal/Hacl_Hash_Blake2b.h
+++ b/include/internal/Hacl_Hash_Blake2b.h
@@ -38,12 +38,12 @@ extern "C" {
 #include "internal/Hacl_Impl_Blake2_Constants.h"
 #include "../Hacl_Hash_Blake2b.h"
 
-typedef struct Hacl_Hash_Blake2b_index_s
+typedef struct Hacl_Hash_Blake2b_params_and_key_s
 {
-  uint8_t key_length;
-  uint8_t digest_length;
+  Hacl_Hash_Blake2b_blake2_params *fst;
+  uint8_t *snd;
 }
-Hacl_Hash_Blake2b_index;
+Hacl_Hash_Blake2b_params_and_key;
 
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn);
 
@@ -62,6 +62,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -69,13 +70,6 @@ Hacl_Hash_Blake2b_update_last(
 
 void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash);
 
-typedef struct K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t__s
-{
-  Hacl_Hash_Blake2b_blake2_params *fst;
-  uint8_t *snd;
-}
-K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_;
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/internal/Hacl_Hash_Blake2b_Simd256.h b/include/internal/Hacl_Hash_Blake2b_Simd256.h
index 4dd986b2..04b091fc 100644
--- a/include/internal/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/internal/Hacl_Hash_Blake2b_Simd256.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/internal/Hacl_Hash_Blake2s.h b/include/internal/Hacl_Hash_Blake2s.h
index eccd92de..279c472e 100644
--- a/include/internal/Hacl_Hash_Blake2s.h
+++ b/include/internal/Hacl_Hash_Blake2s.h
@@ -56,6 +56,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/internal/Hacl_Hash_Blake2s_Simd128.h b/include/internal/Hacl_Hash_Blake2s_Simd128.h
index 2c422949..77505dc2 100644
--- a/include/internal/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/internal/Hacl_Hash_Blake2s_Simd128.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/msvc/Hacl_Bignum32.h b/include/msvc/Hacl_Bignum32.h
index 84a839a9..709f22d9 100644
--- a/include/msvc/Hacl_Bignum32.h
+++ b/include/msvc/Hacl_Bignum32.h
@@ -56,9 +56,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
@@ -67,82 +76,134 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -157,22 +218,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -187,18 +256,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
@@ -212,15 +286,16 @@ Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n);
@@ -228,16 +303,18 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -249,21 +326,25 @@ Hacl_Bignum32_mod_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -277,21 +358,25 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -305,14 +390,17 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -330,42 +418,48 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b);
 
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b);
 
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res);
 
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 
@@ -378,14 +472,22 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
diff --git a/include/msvc/Hacl_Hash_Blake2b.h b/include/msvc/Hacl_Hash_Blake2b.h
index 3403fc83..fcc2d5df 100644
--- a/include/msvc/Hacl_Hash_Blake2b.h
+++ b/include/msvc/Hacl_Hash_Blake2b.h
@@ -53,6 +53,24 @@ typedef struct Hacl_Hash_Blake2b_blake2_params_s
 }
 Hacl_Hash_Blake2b_blake2_params;
 
+typedef struct Hacl_Hash_Blake2b_index_s
+{
+  uint8_t key_length;
+  uint8_t digest_length;
+  bool last_node;
+}
+Hacl_Hash_Blake2b_index;
+
+#define HACL_HASH_BLAKE2B_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_PERSONAL_BYTES (16U)
+
 typedef struct K____uint64_t___uint64_t__s
 {
   uint64_t *fst;
@@ -64,7 +82,8 @@ typedef struct Hacl_Hash_Blake2b_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint64_t___uint64_t_ thd;
+  bool thd;
+  K____uint64_t___uint64_t_ f3;
 }
 Hacl_Hash_Blake2b_block_state_t;
 
@@ -92,7 +111,11 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
  Specialized allocation function that picks default values for all
@@ -116,7 +139,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void);
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -159,10 +182,14 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s);
 
 /**
   Free state function when there is no key
@@ -198,10 +225,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_Blake2b_Simd256.h b/include/msvc/Hacl_Hash_Blake2b_Simd256.h
index af309dc8..f1799e25 100644
--- a/include/msvc/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/msvc/Hacl_Hash_Blake2b_Simd256.h
@@ -40,6 +40,16 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
+#define HACL_HASH_BLAKE2B_SIMD256_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_PERSONAL_BYTES (16U)
+
 typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s
 {
   Lib_IntVector_Intrinsics_vec256 *fst;
@@ -51,7 +61,8 @@ typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ thd;
+  bool thd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ f3;
 }
 Hacl_Hash_Blake2b_Simd256_block_state_t;
 
@@ -64,34 +75,54 @@ typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
 Hacl_Hash_Blake2b_Simd256_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -101,21 +132,27 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -125,10 +162,19 @@ Hacl_Hash_Blake2b_Simd256_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
   Free state function when there is no key
@@ -136,7 +182,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
 void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state);
@@ -161,8 +207,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_Blake2s.h b/include/msvc/Hacl_Hash_Blake2s.h
index ac783473..870f1edc 100644
--- a/include/msvc/Hacl_Hash_Blake2s.h
+++ b/include/msvc/Hacl_Hash_Blake2s.h
@@ -38,6 +38,16 @@ extern "C" {
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Hash_Blake2b.h"
 
+#define HACL_HASH_BLAKE2S_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_PERSONAL_BYTES (8U)
+
 typedef struct K____uint32_t___uint32_t__s
 {
   uint32_t *fst;
@@ -49,7 +59,8 @@ typedef struct Hacl_Hash_Blake2s_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint32_t___uint32_t_ thd;
+  bool thd;
+  K____uint32_t___uint32_t_ f3;
 }
 Hacl_Hash_Blake2s_block_state_t;
 
@@ -62,30 +73,53 @@ typedef struct Hacl_Hash_Blake2s_state_t_s
 Hacl_Hash_Blake2s_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -95,28 +129,44 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len);
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s);
 
 /**
   Free state function when there is no key
@@ -124,7 +174,7 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state);
 
@@ -148,8 +198,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_Blake2s_Simd128.h b/include/msvc/Hacl_Hash_Blake2s_Simd128.h
index d725ee86..2bae1c8e 100644
--- a/include/msvc/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/msvc/Hacl_Hash_Blake2s_Simd128.h
@@ -39,6 +39,16 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
+#define HACL_HASH_BLAKE2S_SIMD128_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_PERSONAL_BYTES (8U)
+
 typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s
 {
   Lib_IntVector_Intrinsics_vec128 *fst;
@@ -50,7 +60,8 @@ typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ thd;
+  bool thd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ f3;
 }
 Hacl_Hash_Blake2s_Simd128_block_state_t;
 
@@ -63,34 +74,54 @@ typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
 Hacl_Hash_Blake2s_Simd128_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -100,21 +131,27 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -124,10 +161,19 @@ Hacl_Hash_Blake2s_Simd128_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
   Free state function when there is no key
@@ -135,7 +181,7 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
 void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state);
@@ -160,8 +206,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_SHA3.h b/include/msvc/Hacl_Hash_SHA3.h
index 8fb78fcd..18f23d8d 100644
--- a/include/msvc/Hacl_Hash_SHA3.h
+++ b/include/msvc/Hacl_Hash_SHA3.h
@@ -117,7 +117,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s);
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -131,14 +131,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h
index 617e8e34..72162d43 100644
--- a/include/msvc/Hacl_Hash_SHA3_Simd256.h
+++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h
@@ -139,12 +139,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -161,15 +161,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -192,7 +192,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/include/msvc/internal/Hacl_Hash_Blake2b.h b/include/msvc/internal/Hacl_Hash_Blake2b.h
index 6928d205..2dad4b01 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2b.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2b.h
@@ -38,12 +38,12 @@ extern "C" {
 #include "internal/Hacl_Impl_Blake2_Constants.h"
 #include "../Hacl_Hash_Blake2b.h"
 
-typedef struct Hacl_Hash_Blake2b_index_s
+typedef struct Hacl_Hash_Blake2b_params_and_key_s
 {
-  uint8_t key_length;
-  uint8_t digest_length;
+  Hacl_Hash_Blake2b_blake2_params *fst;
+  uint8_t *snd;
 }
-Hacl_Hash_Blake2b_index;
+Hacl_Hash_Blake2b_params_and_key;
 
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn);
 
@@ -62,6 +62,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -69,13 +70,6 @@ Hacl_Hash_Blake2b_update_last(
 
 void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash);
 
-typedef struct K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t__s
-{
-  Hacl_Hash_Blake2b_blake2_params *fst;
-  uint8_t *snd;
-}
-K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_;
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h b/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
index 4dd986b2..04b091fc 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/msvc/internal/Hacl_Hash_Blake2s.h b/include/msvc/internal/Hacl_Hash_Blake2s.h
index eccd92de..279c472e 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2s.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2s.h
@@ -56,6 +56,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h b/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
index 2c422949..77505dc2 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/karamel/include/krml/internal/target.h b/karamel/include/krml/internal/target.h
index d4252a10..8e00f2fd 100644
--- a/karamel/include/krml/internal/target.h
+++ b/karamel/include/krml/internal/target.h
@@ -69,6 +69,14 @@
 #  endif
 #endif
 
+#ifndef KRML_ATTRIBUTE_TARGET
+#  if defined(__GNUC__)
+#    define KRML_ATTRIBUTE_TARGET(x) __attribute__((target(x)))
+#  else
+#    define KRML_ATTRIBUTE_TARGET(x)
+#  endif
+#endif
+
 #ifndef KRML_NOINLINE
 #  if defined(_MSC_VER)
 #    define KRML_NOINLINE __declspec(noinline)
@@ -82,6 +90,18 @@
 #  endif
 #endif
 
+#ifndef KRML_MUSTINLINE
+#  if defined(_MSC_VER)
+#    define KRML_MUSTINLINE inline __forceinline
+#  elif defined (__GNUC__)
+#    define KRML_MUSTINLINE inline __attribute__((always_inline))
+#  else
+#    define KRML_MUSTINLINE inline
+#    warning "The KRML_MUSTINLINE macro defaults to plain inline for this toolchain!"
+#    warning "Please locate target.h and try to fill it out with a suitable definition for this compiler."
+#  endif
+#endif
+
 #ifndef KRML_PRE_ALIGN
 #  ifdef _MSC_VER
 #    define KRML_PRE_ALIGN(X) __declspec(align(X))
diff --git a/ocaml/ctypes.depend b/ocaml/ctypes.depend
index d94fad90..79cea4b2 100644
--- a/ocaml/ctypes.depend
+++ b/ocaml/ctypes.depend
@@ -1,4 +1,4 @@
-CTYPES_DEPS=lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Spec_stubs.cmx lib/Hacl_Spec_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2s_stubs.cmx lib/Hacl_Hash_Blake2s_bindings.cmx lib/Hacl_Hash_Blake2b_Simd256_stubs.cmx lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib/Hacl_Hash_Blake2s_Simd128_stubs.cmx lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib/Hacl_Hash_Base_stubs.cmx lib/Hacl_Hash_Base_bindings.cmx lib/Hacl_Hash_SHA1_stubs.cmx lib/Hacl_Hash_SHA1_bindings.cmx lib/Hacl_Hash_SHA2_stubs.cmx lib/Hacl_Hash_SHA2_bindings.cmx lib/Hacl_HMAC_stubs.cmx lib/Hacl_HMAC_bindings.cmx lib/Hacl_HMAC_Blake2s_128_stubs.cmx lib/Hacl_HMAC_Blake2s_128_bindings.cmx lib/Hacl_HMAC_Blake2b_256_stubs.cmx lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib/Hacl_Hash_SHA3_stubs.cmx lib/Hacl_Hash_SHA3_bindings.cmx lib/Hacl_SHA2_Types_stubs.cmx lib/Hacl_SHA2_Types_bindings.cmx lib/Hacl_Hash_SHA3_Simd256_stubs.cmx lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib/Hacl_Hash_MD5_stubs.cmx lib/Hacl_Hash_MD5_bindings.cmx lib/EverCrypt_Error_stubs.cmx lib/EverCrypt_Error_bindings.cmx lib/EverCrypt_AutoConfig2_stubs.cmx lib/EverCrypt_AutoConfig2_bindings.cmx lib/EverCrypt_Hash_stubs.cmx lib/EverCrypt_Hash_bindings.cmx lib/Hacl_Chacha20_stubs.cmx lib/Hacl_Chacha20_bindings.cmx lib/Hacl_Salsa20_stubs.cmx lib/Hacl_Salsa20_bindings.cmx lib/Hacl_Bignum_Base_stubs.cmx lib/Hacl_Bignum_Base_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_bindings.cmx lib/Hacl_Curve25519_64_stubs.cmx lib/Hacl_Curve25519_64_bindings.cmx lib/Hacl_Bignum25519_51_stubs.cmx lib/Hacl_Bignum25519_51_bindings.cmx lib/Hacl_Curve25519_51_stubs.cmx lib/Hacl_Curve25519_51_bindings.cmx lib/Hacl_MAC_Poly1305_stubs.cmx lib/Hacl_MAC_Poly1305_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_bindings.cmx lib/Hacl_MAC_Poly1305_Simd128_stubs.cmx lib/Hacl_MAC_Poly1305_Simd128_bindings.cmx lib/Hacl_Chacha20_Vec128_stubs.cmx lib/Hacl_Chacha20_Vec128_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_bindings.cmx lib/Hacl_MAC_Poly1305_Simd256_stubs.cmx lib/Hacl_MAC_Poly1305_Simd256_bindings.cmx lib/Hacl_Chacha20_Vec256_stubs.cmx lib/Hacl_Chacha20_Vec256_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_bindings.cmx lib/Hacl_Ed25519_stubs.cmx lib/Hacl_Ed25519_bindings.cmx lib/Hacl_NaCl_stubs.cmx lib/Hacl_NaCl_bindings.cmx lib/Hacl_P256_stubs.cmx lib/Hacl_P256_bindings.cmx lib/Hacl_Bignum_K256_stubs.cmx lib/Hacl_Bignum_K256_bindings.cmx lib/Hacl_K256_ECDSA_stubs.cmx lib/Hacl_K256_ECDSA_bindings.cmx lib/Hacl_Frodo_KEM_stubs.cmx lib/Hacl_Frodo_KEM_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_IntTypes_Intrinsics_stubs.cmx lib/Hacl_IntTypes_Intrinsics_bindings.cmx lib/Hacl_IntTypes_Intrinsics_128_stubs.cmx lib/Hacl_IntTypes_Intrinsics_128_bindings.cmx lib/Hacl_RSAPSS_stubs.cmx lib/Hacl_RSAPSS_bindings.cmx lib/Hacl_FFDHE_stubs.cmx lib/Hacl_FFDHE_bindings.cmx lib/Hacl_Frodo640_stubs.cmx lib/Hacl_Frodo640_bindings.cmx lib/Hacl_HKDF_stubs.cmx lib/Hacl_HKDF_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_bindings.cmx lib/EverCrypt_Cipher_stubs.cmx lib/EverCrypt_Cipher_bindings.cmx lib/Hacl_GenericField32_stubs.cmx lib/Hacl_GenericField32_bindings.cmx lib/Hacl_SHA2_Vec256_stubs.cmx lib/Hacl_SHA2_Vec256_bindings.cmx lib/Hacl_EC_K256_stubs.cmx lib/Hacl_EC_K256_bindings.cmx lib/Hacl_Bignum4096_stubs.cmx lib/Hacl_Bignum4096_bindings.cmx lib/Hacl_Chacha20_Vec32_stubs.cmx lib/Hacl_Chacha20_Vec32_bindings.cmx lib/EverCrypt_Ed25519_stubs.cmx lib/EverCrypt_Ed25519_bindings.cmx lib/Hacl_Bignum4096_32_stubs.cmx lib/Hacl_Bignum4096_32_bindings.cmx lib/EverCrypt_HMAC_stubs.cmx lib/EverCrypt_HMAC_bindings.cmx lib/Hacl_HMAC_DRBG_stubs.cmx lib/Hacl_HMAC_DRBG_bindings.cmx lib/EverCrypt_DRBG_stubs.cmx lib/EverCrypt_DRBG_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP128_SHA256_bindings.cmx lib/EverCrypt_Curve25519_stubs.cmx lib/EverCrypt_Curve25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_bindings.cmx lib/Hacl_Frodo976_stubs.cmx lib/Hacl_Frodo976_bindings.cmx lib/Hacl_HKDF_Blake2s_128_stubs.cmx lib/Hacl_HKDF_Blake2s_128_bindings.cmx lib/Hacl_GenericField64_stubs.cmx lib/Hacl_GenericField64_bindings.cmx lib/Hacl_Frodo1344_stubs.cmx lib/Hacl_Frodo1344_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_bindings.cmx lib/Hacl_Bignum32_stubs.cmx lib/Hacl_Bignum32_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_bindings.cmx lib/Hacl_Bignum256_32_stubs.cmx lib/Hacl_Bignum256_32_bindings.cmx lib/Hacl_SHA2_Vec128_stubs.cmx lib/Hacl_SHA2_Vec128_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib/EverCrypt_Poly1305_stubs.cmx lib/EverCrypt_Poly1305_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_P256_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP32_SHA256_bindings.cmx lib/Hacl_Bignum64_stubs.cmx lib/Hacl_Bignum64_bindings.cmx lib/Hacl_Frodo64_stubs.cmx lib/Hacl_Frodo64_bindings.cmx lib/Hacl_HKDF_Blake2b_256_stubs.cmx lib/Hacl_HKDF_Blake2b_256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_bindings.cmx lib/EverCrypt_HKDF_stubs.cmx lib/EverCrypt_HKDF_bindings.cmx lib/Hacl_EC_Ed25519_stubs.cmx lib/Hacl_EC_Ed25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_bindings.cmx lib/EverCrypt_Chacha20Poly1305_stubs.cmx lib/EverCrypt_Chacha20Poly1305_bindings.cmx lib/EverCrypt_AEAD_stubs.cmx lib/EverCrypt_AEAD_bindings.cmx lib/Hacl_Bignum256_stubs.cmx lib/Hacl_Bignum256_bindings.cmx 
+CTYPES_DEPS=lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Spec_stubs.cmx lib/Hacl_Spec_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2s_stubs.cmx lib/Hacl_Hash_Blake2s_bindings.cmx lib/Hacl_Hash_Blake2b_Simd256_stubs.cmx lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib/Hacl_Hash_Blake2s_Simd128_stubs.cmx lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib/Hacl_Hash_Base_stubs.cmx lib/Hacl_Hash_Base_bindings.cmx lib/Hacl_Hash_SHA1_stubs.cmx lib/Hacl_Hash_SHA1_bindings.cmx lib/Hacl_Hash_SHA2_stubs.cmx lib/Hacl_Hash_SHA2_bindings.cmx lib/Hacl_HMAC_stubs.cmx lib/Hacl_HMAC_bindings.cmx lib/Hacl_HMAC_Blake2s_128_stubs.cmx lib/Hacl_HMAC_Blake2s_128_bindings.cmx lib/Hacl_HMAC_Blake2b_256_stubs.cmx lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib/Hacl_Hash_SHA3_stubs.cmx lib/Hacl_Hash_SHA3_bindings.cmx lib/Hacl_SHA2_Types_stubs.cmx lib/Hacl_SHA2_Types_bindings.cmx lib/Hacl_Hash_SHA3_Simd256_stubs.cmx lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib/Hacl_Hash_MD5_stubs.cmx lib/Hacl_Hash_MD5_bindings.cmx lib/EverCrypt_Error_stubs.cmx lib/EverCrypt_Error_bindings.cmx lib/EverCrypt_AutoConfig2_stubs.cmx lib/EverCrypt_AutoConfig2_bindings.cmx lib/EverCrypt_Hash_stubs.cmx lib/EverCrypt_Hash_bindings.cmx lib/Hacl_Chacha20_stubs.cmx lib/Hacl_Chacha20_bindings.cmx lib/Hacl_Salsa20_stubs.cmx lib/Hacl_Salsa20_bindings.cmx lib/Hacl_Bignum_Base_stubs.cmx lib/Hacl_Bignum_Base_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_bindings.cmx lib/Hacl_Curve25519_64_stubs.cmx lib/Hacl_Curve25519_64_bindings.cmx lib/Hacl_Bignum25519_51_stubs.cmx lib/Hacl_Bignum25519_51_bindings.cmx lib/Hacl_Curve25519_51_stubs.cmx lib/Hacl_Curve25519_51_bindings.cmx lib/Hacl_MAC_Poly1305_stubs.cmx lib/Hacl_MAC_Poly1305_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_bindings.cmx lib/Hacl_MAC_Poly1305_Simd128_stubs.cmx lib/Hacl_MAC_Poly1305_Simd128_bindings.cmx lib/Hacl_Chacha20_Vec128_stubs.cmx lib/Hacl_Chacha20_Vec128_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_bindings.cmx lib/Hacl_MAC_Poly1305_Simd256_stubs.cmx lib/Hacl_MAC_Poly1305_Simd256_bindings.cmx lib/Hacl_Chacha20_Vec256_stubs.cmx lib/Hacl_Chacha20_Vec256_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_bindings.cmx lib/Hacl_Ed25519_stubs.cmx lib/Hacl_Ed25519_bindings.cmx lib/Hacl_NaCl_stubs.cmx lib/Hacl_NaCl_bindings.cmx lib/Hacl_P256_stubs.cmx lib/Hacl_P256_bindings.cmx lib/Hacl_Bignum_K256_stubs.cmx lib/Hacl_Bignum_K256_bindings.cmx lib/Hacl_K256_ECDSA_stubs.cmx lib/Hacl_K256_ECDSA_bindings.cmx lib/Hacl_Frodo_KEM_stubs.cmx lib/Hacl_Frodo_KEM_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_IntTypes_Intrinsics_stubs.cmx lib/Hacl_IntTypes_Intrinsics_bindings.cmx lib/Hacl_IntTypes_Intrinsics_128_stubs.cmx lib/Hacl_IntTypes_Intrinsics_128_bindings.cmx lib/Hacl_RSAPSS_stubs.cmx lib/Hacl_RSAPSS_bindings.cmx lib/Hacl_FFDHE_stubs.cmx lib/Hacl_FFDHE_bindings.cmx lib/Hacl_Frodo640_stubs.cmx lib/Hacl_Frodo640_bindings.cmx lib/Hacl_HKDF_stubs.cmx lib/Hacl_HKDF_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_bindings.cmx lib/EverCrypt_Cipher_stubs.cmx lib/EverCrypt_Cipher_bindings.cmx lib/Hacl_GenericField32_stubs.cmx lib/Hacl_GenericField32_bindings.cmx lib/Hacl_SHA2_Vec256_stubs.cmx lib/Hacl_SHA2_Vec256_bindings.cmx lib/Hacl_EC_K256_stubs.cmx lib/Hacl_EC_K256_bindings.cmx lib/Hacl_Bignum4096_stubs.cmx lib/Hacl_Bignum4096_bindings.cmx lib/Hacl_Chacha20_Vec32_stubs.cmx lib/Hacl_Chacha20_Vec32_bindings.cmx lib/EverCrypt_Ed25519_stubs.cmx lib/EverCrypt_Ed25519_bindings.cmx lib/Hacl_Bignum4096_32_stubs.cmx lib/Hacl_Bignum4096_32_bindings.cmx lib/EverCrypt_HMAC_stubs.cmx lib/EverCrypt_HMAC_bindings.cmx lib/Hacl_HMAC_DRBG_stubs.cmx lib/Hacl_HMAC_DRBG_bindings.cmx lib/EverCrypt_DRBG_stubs.cmx lib/EverCrypt_DRBG_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP128_SHA256_bindings.cmx lib/EverCrypt_Curve25519_stubs.cmx lib/EverCrypt_Curve25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_bindings.cmx lib/Hacl_Frodo976_stubs.cmx lib/Hacl_Frodo976_bindings.cmx lib/Hacl_HKDF_Blake2s_128_stubs.cmx lib/Hacl_HKDF_Blake2s_128_bindings.cmx lib/Hacl_GenericField64_stubs.cmx lib/Hacl_GenericField64_bindings.cmx lib/Hacl_Frodo1344_stubs.cmx lib/Hacl_Frodo1344_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_bindings.cmx lib/Hacl_Bignum32_stubs.cmx lib/Hacl_Bignum32_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_bindings.cmx lib/Hacl_Bignum256_32_stubs.cmx lib/Hacl_Bignum256_32_bindings.cmx lib/Hacl_SHA2_Vec128_stubs.cmx lib/Hacl_SHA2_Vec128_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib/EverCrypt_Poly1305_stubs.cmx lib/EverCrypt_Poly1305_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_P256_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP32_SHA256_bindings.cmx lib/Hacl_Bignum64_stubs.cmx lib/Hacl_Bignum64_bindings.cmx lib/Hacl_Frodo64_stubs.cmx lib/Hacl_Frodo64_bindings.cmx lib/Hacl_HKDF_Blake2b_256_stubs.cmx lib/Hacl_HKDF_Blake2b_256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_bindings.cmx lib/EverCrypt_HKDF_stubs.cmx lib/EverCrypt_HKDF_bindings.cmx lib/Hacl_EC_Ed25519_stubs.cmx lib/Hacl_EC_Ed25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_bindings.cmx lib/EverCrypt_Chacha20Poly1305_stubs.cmx lib/EverCrypt_Chacha20Poly1305_bindings.cmx lib/EverCrypt_AEAD_stubs.cmx lib/EverCrypt_AEAD_bindings.cmx lib/Hacl_Bignum256_stubs.cmx lib/Hacl_Bignum256_bindings.cmx 
 lib/Hacl_Streaming_Types_bindings.cmx: 
 lib/Hacl_Streaming_Types_bindings.cmo: 
 lib_gen/Hacl_Streaming_Types_gen.cmx: lib/Hacl_Streaming_Types_bindings.cmx
@@ -295,14 +295,14 @@ lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Imp
 lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
 lib_gen/Hacl_HPKE_Curve51_CP32_SHA256_gen.cmx: lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx
 lib_gen/Hacl_HPKE_Curve51_CP32_SHA256_gen.exe: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_c_stubs.o lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib_gen/Hacl_HPKE_Curve51_CP32_SHA256_gen.cmx 
-lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx 
-lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
-lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx: lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx
-lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.exe: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_c_stubs.o lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx 
 lib/EverCrypt_Poly1305_bindings.cmx: 
 lib/EverCrypt_Poly1305_bindings.cmo: 
 lib_gen/EverCrypt_Poly1305_gen.cmx: lib/EverCrypt_Poly1305_bindings.cmx
 lib_gen/EverCrypt_Poly1305_gen.exe: lib/EverCrypt_Poly1305_bindings.cmx lib_gen/EverCrypt_Poly1305_gen.cmx 
+lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx 
+lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
+lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx: lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx
+lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.exe: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_c_stubs.o lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx 
 lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx 
 lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
 lib_gen/Hacl_HPKE_Curve51_CP32_SHA512_gen.cmx: lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx
diff --git a/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
index 1c132a7a..8fdc5be6 100644
--- a/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
@@ -15,8 +15,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2b_Simd256_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
index 7ba4fcf6..b32dfb66 100644
--- a/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
@@ -39,7 +39,20 @@ module Bindings(F:Cstubs.FOREIGN) =
       field hacl_Hash_Blake2b_index "key_length" uint8_t
     let hacl_Hash_Blake2b_index_digest_length =
       field hacl_Hash_Blake2b_index "digest_length" uint8_t
+    let hacl_Hash_Blake2b_index_last_node =
+      field hacl_Hash_Blake2b_index "last_node" bool
     let _ = seal hacl_Hash_Blake2b_index
+    type hacl_Hash_Blake2b_params_and_key =
+      [ `hacl_Hash_Blake2b_params_and_key ] structure
+    let (hacl_Hash_Blake2b_params_and_key :
+      [ `hacl_Hash_Blake2b_params_and_key ] structure typ) =
+      structure "Hacl_Hash_Blake2b_params_and_key_s"
+    let hacl_Hash_Blake2b_params_and_key_fst =
+      field hacl_Hash_Blake2b_params_and_key "fst"
+        (ptr hacl_Hash_Blake2b_blake2_params)
+    let hacl_Hash_Blake2b_params_and_key_snd =
+      field hacl_Hash_Blake2b_params_and_key "snd" (ptr uint8_t)
+    let _ = seal hacl_Hash_Blake2b_params_and_key
     let hacl_Hash_Blake2b_init =
       foreign "Hacl_Hash_Blake2b_init"
         ((ptr uint64_t) @-> (uint32_t @-> (uint32_t @-> (returning void))))
@@ -65,7 +78,9 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_block_state_t_snd =
       field hacl_Hash_Blake2b_block_state_t "snd" uint8_t
     let hacl_Hash_Blake2b_block_state_t_thd =
-      field hacl_Hash_Blake2b_block_state_t "thd" k____uint64_t___uint64_t_
+      field hacl_Hash_Blake2b_block_state_t "thd" bool
+    let hacl_Hash_Blake2b_block_state_t_f3 =
+      field hacl_Hash_Blake2b_block_state_t "f3" k____uint64_t___uint64_t_
     let _ = seal hacl_Hash_Blake2b_block_state_t
     type hacl_Hash_Blake2b_state_t = [ `hacl_Hash_Blake2b_state_t ] structure
     let (hacl_Hash_Blake2b_state_t :
@@ -82,7 +97,8 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_malloc_with_params_and_key =
       foreign "Hacl_Hash_Blake2b_malloc_with_params_and_key"
         ((ptr hacl_Hash_Blake2b_blake2_params) @->
-           (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2b_state_t))))
+           (bool @->
+              (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2b_state_t)))))
     let hacl_Hash_Blake2b_malloc_with_key =
       foreign "Hacl_Hash_Blake2b_malloc_with_key"
         (ocaml_bytes @->
@@ -110,7 +126,11 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_digest =
       foreign "Hacl_Hash_Blake2b_digest"
         ((ptr hacl_Hash_Blake2b_state_t) @->
-           (ocaml_bytes @-> (returning void)))
+           (ocaml_bytes @-> (returning uint8_t)))
+    let hacl_Hash_Blake2b_info =
+      foreign "Hacl_Hash_Blake2b_info"
+        ((ptr hacl_Hash_Blake2b_state_t) @->
+           (returning hacl_Hash_Blake2b_index))
     let hacl_Hash_Blake2b_free =
       foreign "Hacl_Hash_Blake2b_free"
         ((ptr hacl_Hash_Blake2b_state_t) @-> (returning void))
@@ -125,8 +145,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2b_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2b_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2b_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2b_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
index 6533ddbc..75fbbf39 100644
--- a/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
@@ -15,8 +15,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2s_Simd128_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
index f6c93e89..34336a6c 100644
--- a/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
@@ -23,8 +23,9 @@ module Bindings(F:Cstubs.FOREIGN) =
         (uint32_t @->
            ((ptr uint32_t) @->
               ((ptr uint32_t) @->
-                 (uint64_t @->
-                    (uint32_t @-> (ocaml_bytes @-> (returning void)))))))
+                 (bool @->
+                    (uint64_t @->
+                       (uint32_t @-> (ocaml_bytes @-> (returning void))))))))
     let hacl_Hash_Blake2s_finish =
       foreign "Hacl_Hash_Blake2s_finish"
         (uint32_t @-> (ocaml_bytes @-> ((ptr uint32_t) @-> (returning void))))
@@ -47,7 +48,9 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_block_state_t_snd =
       field hacl_Hash_Blake2s_block_state_t "snd" uint8_t
     let hacl_Hash_Blake2s_block_state_t_thd =
-      field hacl_Hash_Blake2s_block_state_t "thd" k____uint32_t___uint32_t_
+      field hacl_Hash_Blake2s_block_state_t "thd" bool
+    let hacl_Hash_Blake2s_block_state_t_f3 =
+      field hacl_Hash_Blake2s_block_state_t "f3" k____uint32_t___uint32_t_
     let _ = seal hacl_Hash_Blake2s_block_state_t
     type hacl_Hash_Blake2s_state_t = [ `hacl_Hash_Blake2s_state_t ] structure
     let (hacl_Hash_Blake2s_state_t :
@@ -64,7 +67,8 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_malloc_with_params_and_key =
       foreign "Hacl_Hash_Blake2s_malloc_with_params_and_key"
         ((ptr hacl_Hash_Blake2b_blake2_params) @->
-           (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2s_state_t))))
+           (bool @->
+              (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2s_state_t)))))
     let hacl_Hash_Blake2s_malloc_with_key =
       foreign "Hacl_Hash_Blake2s_malloc_with_key"
         (ocaml_bytes @->
@@ -92,7 +96,11 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_digest =
       foreign "Hacl_Hash_Blake2s_digest"
         ((ptr hacl_Hash_Blake2s_state_t) @->
-           (ocaml_bytes @-> (returning void)))
+           (ocaml_bytes @-> (returning uint8_t)))
+    let hacl_Hash_Blake2s_info =
+      foreign "Hacl_Hash_Blake2s_info"
+        ((ptr hacl_Hash_Blake2s_state_t) @->
+           (returning hacl_Hash_Blake2b_index))
     let hacl_Hash_Blake2s_free =
       foreign "Hacl_Hash_Blake2s_free"
         ((ptr hacl_Hash_Blake2s_state_t) @-> (returning void))
@@ -107,8 +115,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2s_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2s_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2s_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2s_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/src/EverCrypt_HMAC.c b/src/EverCrypt_HMAC.c
index 90bcaaac..ea3233fc 100644
--- a/src/EverCrypt_HMAC.c
+++ b/src/EverCrypt_HMAC.c
@@ -124,7 +124,6 @@ EverCrypt_HMAC_compute_sha1(
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
   uint32_t s[5U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U, 0xc3d2e1f0U };
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA1_update_last(s, 0ULL, ipad, 64U);
@@ -153,6 +152,7 @@ EverCrypt_HMAC_compute_sha1(
     Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
     Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA1_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA1_init(s);
@@ -236,11 +236,10 @@ EverCrypt_HMAC_compute_sha2_256(
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha256_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
@@ -272,6 +271,7 @@ EverCrypt_HMAC_compute_sha2_256(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha256_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha256_init(s);
@@ -358,11 +358,10 @@ EverCrypt_HMAC_compute_sha2_384(
     0U,
     8U,
     1U,
-    uint64_t *os = st;
     uint64_t x = Hacl_Hash_SHA2_h384[i];
+    uint64_t *os = st;
     os[i] = x;);
   uint64_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
@@ -400,6 +399,7 @@ EverCrypt_HMAC_compute_sha2_384(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha384_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha384_init(s);
@@ -488,11 +488,10 @@ EverCrypt_HMAC_compute_sha2_512(
     0U,
     8U,
     1U,
-    uint64_t *os = st;
     uint64_t x = Hacl_Hash_SHA2_h512[i];
+    uint64_t *os = st;
     os[i] = x;);
   uint64_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
@@ -530,6 +529,7 @@ EverCrypt_HMAC_compute_sha2_512(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha512_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha512_init(s);
@@ -616,11 +616,10 @@ EverCrypt_HMAC_compute_blake2s(
   uint32_t s[16U] = { 0U };
   Hacl_Hash_Blake2s_init(s, 0U, 32U);
   uint32_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -655,10 +654,12 @@ EverCrypt_HMAC_compute_blake2s(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2s_finish(32U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2s_init(s0, 0U, 32U);
@@ -693,6 +694,7 @@ EverCrypt_HMAC_compute_blake2s(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -753,11 +755,16 @@ EverCrypt_HMAC_compute_blake2b(
   uint64_t s[16U] = { 0U };
   Hacl_Hash_Blake2b_init(s, 0U, 64U);
   uint64_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -792,11 +799,13 @@ EverCrypt_HMAC_compute_blake2b(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2b_finish(64U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2b_init(s0, 0U, 64U);
@@ -831,6 +840,7 @@ EverCrypt_HMAC_compute_blake2b(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/EverCrypt_Hash.c b/src/EverCrypt_Hash.c
index bfafa9be..64bd18d6 100644
--- a/src/EverCrypt_Hash.c
+++ b/src/EverCrypt_Hash.c
@@ -616,7 +616,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
   {
     uint32_t *p1 = scrut.case_Blake2S_s;
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
   }
   if (scrut.tag == Blake2S_128_s)
@@ -624,7 +624,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Lib_IntVector_Intrinsics_vec128 *p1 = scrut.case_Blake2S_128_s;
     #if HACL_CAN_COMPILE_VEC128
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
     #else
     KRML_MAYBE_UNUSED_VAR(p1);
@@ -638,6 +638,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
@@ -651,6 +652,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_Simd256_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
@@ -1305,6 +1307,7 @@ EverCrypt_Hash_Incremental_state_t
   KRML_CHECK_SIZE(sizeof (uint8_t), block_len(a));
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(block_len(a), sizeof (uint8_t));
   EverCrypt_Hash_state_s *block_state = create_in(a);
+  init(block_state);
   EverCrypt_Hash_Incremental_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   EverCrypt_Hash_Incremental_state_t
@@ -1313,7 +1316,6 @@ EverCrypt_Hash_Incremental_state_t
         EverCrypt_Hash_Incremental_state_t
       ));
   p[0U] = s;
-  init(block_state);
   return p;
 }
 
@@ -1322,15 +1324,11 @@ Reset an existing state to the initial hash state with empty data.
 */
 void EverCrypt_Hash_Incremental_reset(EverCrypt_Hash_Incremental_state_t *state)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
   Spec_Hash_Definitions_hash_alg i = alg_of_state(block_state);
   KRML_MAYBE_UNUSED_VAR(i);
   init(block_state);
-  EverCrypt_Hash_Incremental_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 /**
@@ -1347,9 +1345,8 @@ EverCrypt_Hash_Incremental_update(
   uint32_t chunk_len
 )
 {
-  EverCrypt_Hash_Incremental_state_t s = *state;
-  EverCrypt_Hash_state_s *block_state = s.block_state;
-  uint64_t total_len = s.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   Spec_Hash_Definitions_hash_alg i1 = alg_of_state(block_state);
   uint64_t sw;
   switch (i1)
@@ -1448,10 +1445,8 @@ EverCrypt_Hash_Incremental_update(
     }
     if (chunk_len <= block_len(i1) - sz)
     {
-      EverCrypt_Hash_Incremental_state_t s1 = *state;
-      EverCrypt_Hash_state_s *block_state1 = s1.block_state;
-      uint8_t *buf = s1.buf;
-      uint64_t total_len1 = s1.total_len;
+      uint8_t *buf = (*state).buf;
+      uint64_t total_len1 = (*state).total_len;
       uint32_t sz1;
       if (total_len1 % (uint64_t)block_len(i1) == 0ULL && total_len1 > 0ULL)
       {
@@ -1464,22 +1459,12 @@ EverCrypt_Hash_Incremental_update(
       uint8_t *buf2 = buf + sz1;
       memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
       uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state1,
-            .buf = buf,
-            .total_len = total_len2
-          }
-        );
+      state->total_len = total_len2;
     }
     else if (sz == 0U)
     {
-      EverCrypt_Hash_Incremental_state_t s1 = *state;
-      EverCrypt_Hash_state_s *block_state1 = s1.block_state;
-      uint8_t *buf = s1.buf;
-      uint64_t total_len1 = s1.total_len;
+      uint8_t *buf = (*state).buf;
+      uint64_t total_len1 = (*state).total_len;
       uint32_t sz1;
       if (total_len1 % (uint64_t)block_len(i1) == 0ULL && total_len1 > 0ULL)
       {
@@ -1492,7 +1477,7 @@ EverCrypt_Hash_Incremental_update(
       if (!(sz1 == 0U))
       {
         uint64_t prevlen = total_len1 - (uint64_t)sz1;
-        update_multi(block_state1, prevlen, buf, block_len(i1));
+        update_multi(block_state, prevlen, buf, block_len(i1));
       }
       uint32_t ite0;
       if ((uint64_t)chunk_len % (uint64_t)block_len(i1) == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1508,28 +1493,18 @@ EverCrypt_Hash_Incremental_update(
       uint32_t data2_len = chunk_len - data1_len;
       uint8_t *data1 = chunk;
       uint8_t *data2 = chunk + data1_len;
-      update_multi(block_state1, total_len1, data1, data1_len);
+      update_multi(block_state, total_len1, data1, data1_len);
       uint8_t *dst = buf;
       memcpy(dst, data2, data2_len * sizeof (uint8_t));
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state1,
-            .buf = buf,
-            .total_len = total_len1 + (uint64_t)chunk_len
-          }
-        );
+      state->total_len = total_len1 + (uint64_t)chunk_len;
     }
     else
     {
       uint32_t diff = block_len(i1) - sz;
       uint8_t *chunk1 = chunk;
       uint8_t *chunk2 = chunk + diff;
-      EverCrypt_Hash_Incremental_state_t s1 = *state;
-      EverCrypt_Hash_state_s *block_state10 = s1.block_state;
-      uint8_t *buf0 = s1.buf;
-      uint64_t total_len10 = s1.total_len;
+      uint8_t *buf = (*state).buf;
+      uint64_t total_len10 = (*state).total_len;
       uint32_t sz10;
       if (total_len10 % (uint64_t)block_len(i1) == 0ULL && total_len10 > 0ULL)
       {
@@ -1539,22 +1514,12 @@ EverCrypt_Hash_Incremental_update(
       {
         sz10 = (uint32_t)(total_len10 % (uint64_t)block_len(i1));
       }
-      uint8_t *buf2 = buf0 + sz10;
+      uint8_t *buf2 = buf + sz10;
       memcpy(buf2, chunk1, diff * sizeof (uint8_t));
       uint64_t total_len2 = total_len10 + (uint64_t)diff;
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state10,
-            .buf = buf0,
-            .total_len = total_len2
-          }
-        );
-      EverCrypt_Hash_Incremental_state_t s10 = *state;
-      EverCrypt_Hash_state_s *block_state1 = s10.block_state;
-      uint8_t *buf = s10.buf;
-      uint64_t total_len1 = s10.total_len;
+      state->total_len = total_len2;
+      uint8_t *buf0 = (*state).buf;
+      uint64_t total_len1 = (*state).total_len;
       uint32_t sz1;
       if (total_len1 % (uint64_t)block_len(i1) == 0ULL && total_len1 > 0ULL)
       {
@@ -1567,7 +1532,7 @@ EverCrypt_Hash_Incremental_update(
       if (!(sz1 == 0U))
       {
         uint64_t prevlen = total_len1 - (uint64_t)sz1;
-        update_multi(block_state1, prevlen, buf, block_len(i1));
+        update_multi(block_state, prevlen, buf0, block_len(i1));
       }
       uint32_t ite0;
       if
@@ -1589,18 +1554,10 @@ EverCrypt_Hash_Incremental_update(
       uint32_t data2_len = chunk_len - diff - data1_len;
       uint8_t *data1 = chunk2;
       uint8_t *data2 = chunk2 + data1_len;
-      update_multi(block_state1, total_len1, data1, data1_len);
-      uint8_t *dst = buf;
+      update_multi(block_state, total_len1, data1, data1_len);
+      uint8_t *dst = buf0;
       memcpy(dst, data2, data2_len * sizeof (uint8_t));
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state1,
-            .buf = buf,
-            .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-          }
-        );
+      state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
     }
     ite = Hacl_Streaming_Types_Success;
   }
@@ -1624,10 +1581,9 @@ EverCrypt_Hash_Incremental_update(
 
 static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_MD5) == 0ULL && total_len > 0ULL)
   {
@@ -1643,6 +1599,7 @@ static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outpu
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_MD5) == 0U && r > 0U)
   {
@@ -1653,7 +1610,6 @@ static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outpu
     ite = r % block_len(Spec_Hash_Definitions_MD5);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1662,10 +1618,9 @@ static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outpu
 
 static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA1) == 0ULL && total_len > 0ULL)
   {
@@ -1681,6 +1636,7 @@ static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outp
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA1) == 0U && r > 0U)
   {
@@ -1691,7 +1647,6 @@ static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outp
     ite = r % block_len(Spec_Hash_Definitions_SHA1);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1700,10 +1655,9 @@ static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outp
 
 static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_224) == 0ULL && total_len > 0ULL)
@@ -1720,6 +1674,7 @@ static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_224) == 0U && r > 0U)
   {
@@ -1730,7 +1685,6 @@ static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_224);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1739,10 +1693,9 @@ static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_256) == 0ULL && total_len > 0ULL)
@@ -1759,6 +1712,7 @@ static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_256) == 0U && r > 0U)
   {
@@ -1769,7 +1723,6 @@ static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_256);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1778,10 +1731,9 @@ static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_224) == 0ULL && total_len > 0ULL)
@@ -1798,6 +1750,7 @@ static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_224) == 0U && r > 0U)
   {
@@ -1808,7 +1761,6 @@ static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_224);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1817,10 +1769,9 @@ static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_256) == 0ULL && total_len > 0ULL)
@@ -1837,6 +1788,7 @@ static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_256) == 0U && r > 0U)
   {
@@ -1847,7 +1799,6 @@ static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_256);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1856,10 +1807,9 @@ static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_384) == 0ULL && total_len > 0ULL)
@@ -1876,6 +1826,7 @@ static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_384) == 0U && r > 0U)
   {
@@ -1886,7 +1837,6 @@ static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_384);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1895,10 +1845,9 @@ static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_512) == 0ULL && total_len > 0ULL)
@@ -1915,6 +1864,7 @@ static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_512) == 0U && r > 0U)
   {
@@ -1925,7 +1875,6 @@ static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_512);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1934,10 +1883,9 @@ static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_384) == 0ULL && total_len > 0ULL)
@@ -1954,6 +1902,7 @@ static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_384) == 0U && r > 0U)
   {
@@ -1964,7 +1913,6 @@ static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_384);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1973,10 +1921,9 @@ static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_512) == 0ULL && total_len > 0ULL)
@@ -1993,6 +1940,7 @@ static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_512) == 0U && r > 0U)
   {
@@ -2003,7 +1951,6 @@ static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_512);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -2012,10 +1959,9 @@ static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_Blake2S) == 0ULL && total_len > 0ULL)
   {
@@ -2046,6 +1992,7 @@ static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_Blake2S) == 0U && r > 0U)
   {
@@ -2056,7 +2003,6 @@ static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
     ite = r % block_len(Spec_Hash_Definitions_Blake2S);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -2065,10 +2011,9 @@ static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
 
 static void digest_blake2b(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_Blake2B) == 0ULL && total_len > 0ULL)
   {
@@ -2099,6 +2044,7 @@ static void digest_blake2b(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_Blake2B) == 0U && r > 0U)
   {
@@ -2109,7 +2055,6 @@ static void digest_blake2b(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
     ite = r % block_len(Spec_Hash_Definitions_Blake2B);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -2228,8 +2173,8 @@ void EverCrypt_Hash_Incremental_hash_256(uint8_t *output, uint8_t *input, uint32
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
   uint32_t blocks_n0 = input_len / 64U;
@@ -2266,8 +2211,8 @@ static void hash_224(uint8_t *output, uint8_t *input, uint32_t input_len)
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h224[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
   uint32_t blocks_n0 = input_len / 64U;
diff --git a/src/Hacl_AEAD_Chacha20Poly1305.c b/src/Hacl_AEAD_Chacha20Poly1305.c
index d5926093..4b683308 100644
--- a/src/Hacl_AEAD_Chacha20Poly1305.c
+++ b/src/Hacl_AEAD_Chacha20Poly1305.c
@@ -579,7 +579,8 @@ Hacl_AEAD_Chacha20Poly1305_encrypt(
 {
   Hacl_Chacha20_chacha20_encrypt(input_len, output, input, key, nonce, 1U);
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_32(key1, data_len, data, input_len, output, tag);
 }
@@ -618,7 +619,8 @@ Hacl_AEAD_Chacha20Poly1305_decrypt(
 {
   uint8_t computed_tag[16U] = { 0U };
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_32(key1, data_len, data, input_len, input, computed_tag);
   uint8_t res = 255U;
diff --git a/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c b/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c
index 0cfa41fd..38494f80 100644
--- a/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c
+++ b/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c
@@ -1095,7 +1095,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd128_encrypt(
 {
   Hacl_Chacha20_Vec128_chacha20_encrypt_128(input_len, output, input, key, nonce, 1U);
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_128(key1, data_len, data, input_len, output, tag);
 }
@@ -1134,7 +1135,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd128_decrypt(
 {
   uint8_t computed_tag[16U] = { 0U };
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_128(key1, data_len, data, input_len, input, computed_tag);
   uint8_t res = 255U;
diff --git a/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c b/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c
index 28414516..edf44f38 100644
--- a/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c
+++ b/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c
@@ -1096,7 +1096,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd256_encrypt(
 {
   Hacl_Chacha20_Vec256_chacha20_encrypt_256(input_len, output, input, key, nonce, 1U);
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_256(key1, data_len, data, input_len, output, tag);
 }
@@ -1135,7 +1136,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd256_decrypt(
 {
   uint8_t computed_tag[16U] = { 0U };
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_256(key1, data_len, data, input_len, input, computed_tag);
   uint8_t res = 255U;
diff --git a/src/Hacl_Bignum.c b/src/Hacl_Bignum.c
index 568bcc26..e5fe7695 100644
--- a/src/Hacl_Bignum.c
+++ b/src/Hacl_Bignum.c
@@ -54,8 +54,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint32_t *os = t0;
     uint32_t x = ((0U - c0) & t0[i]) | (~(0U - c0) & tmp_[i]);
+    uint32_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c10);
@@ -64,8 +64,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, b1, b0, t1);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint32_t *os = t1;
     uint32_t x = ((0U - c010) & t1[i]) | (~(0U - c010) & tmp_[i]);
+    uint32_t *os = t1;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -77,6 +77,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a0, b0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a1, b1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint32_t *r011 = res;
   uint32_t *r231 = res + aLen;
   uint32_t *t01 = tmp;
@@ -92,37 +94,47 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t mask = 0U - c_sign;
   for (uint32_t i = 0U; i < aLen; i++)
   {
-    uint32_t *os = t45;
     uint32_t x = (mask & t45[i]) | (~mask & t67[i]);
+    uint32_t *os = t45;
     os[i] = x;
   }
   uint32_t c5 = (mask & c41) | (~mask & c31);
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r0 = res + aLen2;
-  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0);
-  uint32_t c6 = r10;
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint32_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint32_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint32_t));
+  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, a_copy, b_copy, r0);
+  uint32_t r11 = r10;
+  uint32_t c6 = r11;
   uint32_t c60 = c6;
   uint32_t c7 = c5 + c60;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r = res + aLen + aLen2;
   uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32(0U, r[0U], c7, r);
   uint32_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint32_t *a11 = r + 1U;
     uint32_t *res1 = r + 1U;
     uint32_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint32_t t11 = a11[4U * i];
+      uint32_t t11 = res1[4U * i];
       uint32_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, 0U, res_i0);
-      uint32_t t110 = a11[4U * i + 1U];
+      uint32_t t110 = res1[4U * i + 1U];
       uint32_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t110, 0U, res_i1);
-      uint32_t t111 = a11[4U * i + 2U];
+      uint32_t t111 = res1[4U * i + 2U];
       uint32_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t111, 0U, res_i2);
-      uint32_t t112 = a11[4U * i + 3U];
+      uint32_t t112 = res1[4U * i + 3U];
       uint32_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t112, 0U, res_i);
     }
@@ -133,7 +145,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint32_t t11 = a11[i];
+      uint32_t t11 = res1[i];
       uint32_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, 0U, res_i);
     }
@@ -176,8 +188,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint64_t *os = t0;
     uint64_t x = ((0ULL - c0) & t0[i]) | (~(0ULL - c0) & tmp_[i]);
+    uint64_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c10);
@@ -186,8 +198,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, b1, b0, t1);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint64_t *os = t1;
     uint64_t x = ((0ULL - c010) & t1[i]) | (~(0ULL - c010) & tmp_[i]);
+    uint64_t *os = t1;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -199,6 +211,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a0, b0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a1, b1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint64_t *r011 = res;
   uint64_t *r231 = res + aLen;
   uint64_t *t01 = tmp;
@@ -214,37 +228,47 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t mask = 0ULL - c_sign;
   for (uint32_t i = 0U; i < aLen; i++)
   {
-    uint64_t *os = t45;
     uint64_t x = (mask & t45[i]) | (~mask & t67[i]);
+    uint64_t *os = t45;
     os[i] = x;
   }
   uint64_t c5 = (mask & c41) | (~mask & c31);
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r0 = res + aLen2;
-  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0);
-  uint64_t c6 = r10;
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint64_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint64_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint64_t));
+  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, a_copy, b_copy, r0);
+  uint64_t r11 = r10;
+  uint64_t c6 = r11;
   uint64_t c60 = c6;
   uint64_t c7 = c5 + c60;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r = res + aLen + aLen2;
   uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64(0ULL, r[0U], c7, r);
   uint64_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint64_t *a11 = r + 1U;
     uint64_t *res1 = r + 1U;
     uint64_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint64_t t11 = a11[4U * i];
+      uint64_t t11 = res1[4U * i];
       uint64_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i0);
-      uint64_t t110 = a11[4U * i + 1U];
+      uint64_t t110 = res1[4U * i + 1U];
       uint64_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t110, 0ULL, res_i1);
-      uint64_t t111 = a11[4U * i + 2U];
+      uint64_t t111 = res1[4U * i + 2U];
       uint64_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t111, 0ULL, res_i2);
-      uint64_t t112 = a11[4U * i + 3U];
+      uint64_t t112 = res1[4U * i + 3U];
       uint64_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t112, 0ULL, res_i);
     }
@@ -255,7 +279,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint64_t t11 = a11[i];
+      uint64_t t11 = res1[i];
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i);
     }
@@ -294,8 +318,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
   uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint32_t *os = t0;
     uint32_t x = ((0U - c0) & t0[i]) | (~(0U - c0) & tmp_[i]);
+    uint32_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -308,6 +332,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
   uint32_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint32_t *r011 = res;
   uint32_t *r231 = res + aLen;
   uint32_t *t01 = tmp;
@@ -317,31 +343,41 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
   uint32_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(aLen, t01, t231, t45);
   uint32_t c5 = c2 - c3;
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r0 = res + aLen2;
-  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0);
-  uint32_t c4 = r10;
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint32_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint32_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint32_t));
+  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, a_copy, b_copy, r0);
+  uint32_t r11 = r10;
+  uint32_t c4 = r11;
   uint32_t c6 = c4;
   uint32_t c7 = c5 + c6;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r = res + aLen + aLen2;
   uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32(0U, r[0U], c7, r);
   uint32_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint32_t *a11 = r + 1U;
     uint32_t *res1 = r + 1U;
     uint32_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint32_t t1 = a11[4U * i];
+      uint32_t t1 = res1[4U * i];
       uint32_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, 0U, res_i0);
-      uint32_t t10 = a11[4U * i + 1U];
+      uint32_t t10 = res1[4U * i + 1U];
       uint32_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, 0U, res_i1);
-      uint32_t t11 = a11[4U * i + 2U];
+      uint32_t t11 = res1[4U * i + 2U];
       uint32_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, 0U, res_i2);
-      uint32_t t12 = a11[4U * i + 3U];
+      uint32_t t12 = res1[4U * i + 3U];
       uint32_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, 0U, res_i);
     }
@@ -352,7 +388,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint32_t t1 = a11[i];
+      uint32_t t1 = res1[i];
       uint32_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, 0U, res_i);
     }
@@ -391,8 +427,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
   uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint64_t *os = t0;
     uint64_t x = ((0ULL - c0) & t0[i]) | (~(0ULL - c0) & tmp_[i]);
+    uint64_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -405,6 +441,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
   uint64_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint64_t *r011 = res;
   uint64_t *r231 = res + aLen;
   uint64_t *t01 = tmp;
@@ -414,31 +452,41 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
   uint64_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(aLen, t01, t231, t45);
   uint64_t c5 = c2 - c3;
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r0 = res + aLen2;
-  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0);
-  uint64_t c4 = r10;
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint64_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint64_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint64_t));
+  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, a_copy, b_copy, r0);
+  uint64_t r11 = r10;
+  uint64_t c4 = r11;
   uint64_t c6 = c4;
   uint64_t c7 = c5 + c6;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r = res + aLen + aLen2;
   uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64(0ULL, r[0U], c7, r);
   uint64_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint64_t *a11 = r + 1U;
     uint64_t *res1 = r + 1U;
     uint64_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint64_t t1 = a11[4U * i];
+      uint64_t t1 = res1[4U * i];
       uint64_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i0);
-      uint64_t t10 = a11[4U * i + 1U];
+      uint64_t t10 = res1[4U * i + 1U];
       uint64_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, 0ULL, res_i1);
-      uint64_t t11 = a11[4U * i + 2U];
+      uint64_t t11 = res1[4U * i + 2U];
       uint64_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i2);
-      uint64_t t12 = a11[4U * i + 3U];
+      uint64_t t12 = res1[4U * i + 3U];
       uint64_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, 0ULL, res_i);
     }
@@ -449,7 +497,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint64_t t1 = a11[i];
+      uint64_t t1 = res1[i];
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i);
     }
@@ -537,8 +585,8 @@ Hacl_Bignum_bn_add_mod_n_u32(
   uint32_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -614,8 +662,8 @@ Hacl_Bignum_bn_add_mod_n_u64(
   uint64_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -692,8 +740,8 @@ Hacl_Bignum_bn_sub_mod_n_u32(
   uint32_t c2 = 0U - c00;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -770,8 +818,8 @@ Hacl_Bignum_bn_sub_mod_n_u64(
   uint64_t c2 = 0ULL - c00;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -852,7 +900,15 @@ Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(
   res[i] = res[i] | 1U << j;
   for (uint32_t i0 = 0U; i0 < 64U * len - nBits; i0++)
   {
-    Hacl_Bignum_bn_add_mod_n_u32(len, n, res, res, res);
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t a_copy[len];
+    memset(a_copy, 0U, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t b_copy[len];
+    memset(b_copy, 0U, len * sizeof (uint32_t));
+    memcpy(a_copy, res, len * sizeof (uint32_t));
+    memcpy(b_copy, res, len * sizeof (uint32_t));
+    Hacl_Bignum_bn_add_mod_n_u32(len, n, a_copy, b_copy, res);
   }
 }
 
@@ -888,8 +944,8 @@ bn_mont_reduction_u32(uint32_t len, uint32_t *n, uint32_t nInv, uint32_t *c, uin
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + len + i0;
     uint32_t res_j = c[len + i0];
+    uint32_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint32_t));
@@ -928,8 +984,8 @@ bn_mont_reduction_u32(uint32_t len, uint32_t *n, uint32_t nInv, uint32_t *c, uin
   uint32_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -1043,7 +1099,15 @@ Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(
   res[i] = res[i] | 1ULL << j;
   for (uint32_t i0 = 0U; i0 < 128U * len - nBits; i0++)
   {
-    Hacl_Bignum_bn_add_mod_n_u64(len, n, res, res, res);
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t a_copy[len];
+    memset(a_copy, 0U, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t b_copy[len];
+    memset(b_copy, 0U, len * sizeof (uint64_t));
+    memcpy(a_copy, res, len * sizeof (uint64_t));
+    memcpy(b_copy, res, len * sizeof (uint64_t));
+    Hacl_Bignum_bn_add_mod_n_u64(len, n, a_copy, b_copy, res);
   }
 }
 
@@ -1079,8 +1143,8 @@ bn_mont_reduction_u64(uint32_t len, uint64_t *n, uint64_t nInv, uint64_t *c, uin
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + len + i0;
     uint64_t res_j = c[len + i0];
+    uint64_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint64_t));
@@ -1119,8 +1183,8 @@ bn_mont_reduction_u64(uint32_t len, uint64_t *n, uint64_t nInv, uint64_t *c, uin
   uint64_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -1238,8 +1302,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u32(
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + len + i0;
     uint32_t res_j = c[len + i0];
+    uint32_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint32_t));
@@ -1252,8 +1316,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u32(
   uint32_t m = 0U - c00;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -1335,8 +1399,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u64(
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + len + i0;
     uint64_t res_j = c[len + i0];
+    uint64_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint64_t));
@@ -1349,8 +1413,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u64(
   uint64_t m = 0ULL - c00;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -1489,9 +1553,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
     memset(ctx, 0U, (len + len) * sizeof (uint32_t));
     memcpy(ctx, n, len * sizeof (uint32_t));
     memcpy(ctx + len, r2, len * sizeof (uint32_t));
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -1500,11 +1565,21 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
       uint32_t bit = tmp >> j & 1U;
       if (!(bit == 0U))
       {
-        uint32_t *ctx_n0 = ctx;
-        bn_almost_mont_mul_u32(len, ctx_n0, mu, resM, aM, resM);
+        KRML_CHECK_SIZE(sizeof (uint32_t), len);
+        uint32_t aM_copy[len];
+        memset(aM_copy, 0U, len * sizeof (uint32_t));
+        memcpy(aM_copy, resM, len * sizeof (uint32_t));
+        uint32_t *ctx_n = ctx;
+        bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint32_t *ctx_n0 = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n0, mu, aM, aM);
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, aM, len * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     Hacl_Bignum_Montgomery_bn_from_mont_u32(len, n, mu, resM, res);
     return;
@@ -1541,18 +1616,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u32(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u32(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, aM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -1567,6 +1654,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint32_t), len);
   uint32_t tmp0[len];
@@ -1577,15 +1665,26 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, resM, len * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = bits_l;
     const uint32_t *a_bits_l = table + bits_l32 * len;
     memcpy(tmp0, (uint32_t *)a_bits_l, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, resM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, n, mu, resM, res);
 }
@@ -1617,9 +1716,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
     memcpy(ctx, n, len * sizeof (uint32_t));
     memcpy(ctx + len, r2, len * sizeof (uint32_t));
     uint32_t sw = 0U;
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -1633,10 +1733,20 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint32_t *ctx_n0 = ctx;
-      bn_almost_mont_mul_u32(len, ctx_n0, mu, aM, resM, aM);
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, aM, len * sizeof (uint32_t));
       uint32_t *ctx_n1 = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n1, mu, resM, resM);
+      bn_almost_mont_mul_u32(len, ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy0[len];
+      memset(aM_copy0, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy0, resM, len * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -1681,18 +1791,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u32(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u32(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, aM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -1707,8 +1829,8 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
       const uint32_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint32_t *os = resM;
         uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint32_t *os = resM;
         os[i] = x;
       });
   }
@@ -1717,6 +1839,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint32_t), len);
   uint32_t tmp0[len];
@@ -1727,10 +1850,16 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, resM, len * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint32_t *)(table + 0U * len), len * sizeof (uint32_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -1740,12 +1869,17 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
       const uint32_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint32_t *os = tmp0;
         uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint32_t *os = tmp0;
         os[i] = x;
       });
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, resM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, n, mu, resM, res);
 }
@@ -1883,9 +2017,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
     memset(ctx, 0U, (len + len) * sizeof (uint64_t));
     memcpy(ctx, n, len * sizeof (uint64_t));
     memcpy(ctx + len, r2, len * sizeof (uint64_t));
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -1894,11 +2029,21 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
       uint64_t bit = tmp >> j & 1ULL;
       if (!(bit == 0ULL))
       {
-        uint64_t *ctx_n0 = ctx;
-        bn_almost_mont_mul_u64(len, ctx_n0, mu, resM, aM, resM);
+        KRML_CHECK_SIZE(sizeof (uint64_t), len);
+        uint64_t aM_copy[len];
+        memset(aM_copy, 0U, len * sizeof (uint64_t));
+        memcpy(aM_copy, resM, len * sizeof (uint64_t));
+        uint64_t *ctx_n = ctx;
+        bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint64_t *ctx_n0 = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n0, mu, aM, aM);
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, aM, len * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     Hacl_Bignum_Montgomery_bn_from_mont_u64(len, n, mu, resM, res);
     return;
@@ -1935,18 +2080,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u64(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u64(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, aM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -1961,6 +2118,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint64_t), len);
   uint64_t tmp0[len];
@@ -1971,15 +2129,26 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, resM, len * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = (uint32_t)bits_l;
     const uint64_t *a_bits_l = table + bits_l32 * len;
     memcpy(tmp0, (uint64_t *)a_bits_l, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, resM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, n, mu, resM, res);
 }
@@ -2011,9 +2180,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
     memcpy(ctx, n, len * sizeof (uint64_t));
     memcpy(ctx + len, r2, len * sizeof (uint64_t));
     uint64_t sw = 0ULL;
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -2027,10 +2197,20 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint64_t *ctx_n0 = ctx;
-      bn_almost_mont_mul_u64(len, ctx_n0, mu, aM, resM, aM);
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, aM, len * sizeof (uint64_t));
       uint64_t *ctx_n1 = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n1, mu, resM, resM);
+      bn_almost_mont_mul_u64(len, ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy0[len];
+      memset(aM_copy0, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy0, resM, len * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -2075,18 +2255,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u64(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u64(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, aM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -2101,8 +2293,8 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
       const uint64_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint64_t *os = resM;
         uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint64_t *os = resM;
         os[i] = x;
       });
   }
@@ -2111,6 +2303,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint64_t), len);
   uint64_t tmp0[len];
@@ -2121,10 +2314,16 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, resM, len * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)(table + 0U * len), len * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -2134,12 +2333,17 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
       const uint64_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;
       });
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, resM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, n, mu, resM, res);
 }
diff --git a/src/Hacl_Bignum256.c b/src/Hacl_Bignum256.c
index 54bbc88a..2e305aa5 100644
--- a/src/Hacl_Bignum256.c
+++ b/src/Hacl_Bignum256.c
@@ -171,8 +171,8 @@ void Hacl_Bignum256_add_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -235,8 +235,8 @@ void Hacl_Bignum256_sub_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -287,8 +287,8 @@ void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *ab = a;
     uint64_t a_j = a[i0];
+    uint64_t *ab = a;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -314,7 +314,12 @@ void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
     }
     uint64_t r = c;
     res[i0 + i0] = r;);
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, res, res);
+  uint64_t a_copy0[8U] = { 0U };
+  uint64_t b_copy0[8U] = { 0U };
+  memcpy(a_copy0, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy0, res, 8U * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint64_t tmp[8U] = { 0U };
   KRML_MAYBE_FOR4(i,
@@ -326,7 +331,12 @@ void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
     uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, tmp, res);
+  uint64_t a_copy[8U] = { 0U };
+  uint64_t b_copy[8U] = { 0U };
+  memcpy(a_copy, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy, tmp, 8U * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -338,7 +348,11 @@ static inline void precompr2(uint32_t nBits, uint64_t *n, uint64_t *res)
   res[i] = res[i] | 1ULL << j;
   for (uint32_t i0 = 0U; i0 < 512U - nBits; i0++)
   {
-    Hacl_Bignum256_add_mod(n, res, res, res);
+    uint64_t a_copy[4U] = { 0U };
+    uint64_t b_copy[4U] = { 0U };
+    memcpy(a_copy, res, 4U * sizeof (uint64_t));
+    memcpy(b_copy, res, 4U * sizeof (uint64_t));
+    Hacl_Bignum256_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -368,8 +382,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 4U + i0;
     uint64_t res_j = c[4U + i0];
+    uint64_t *resb = c + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb););
   memcpy(res, c + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -399,8 +413,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -444,8 +458,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 4U + i0;
     uint64_t res_j = c[4U + i0];
+    uint64_t *resb = c + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb););
   memcpy(res, c + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -457,8 +471,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -611,9 +625,10 @@ exp_vartime_precomp(
     uint64_t ctx[8U] = { 0U };
     memcpy(ctx, n, 4U * sizeof (uint64_t));
     memcpy(ctx + 4U, r2, 4U * sizeof (uint64_t));
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -622,11 +637,17 @@ exp_vartime_precomp(
       uint64_t bit = tmp >> j & 1ULL;
       if (!(bit == 0ULL))
       {
-        uint64_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint64_t aM_copy[4U] = { 0U };
+        memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
+        uint64_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint64_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -653,18 +674,26 @@ exp_vartime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 4U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 4U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 4U;
+    uint64_t aM_copy0[4U] = { 0U };
+    memcpy(aM_copy0, t11, 4U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 4U, tmp, 4U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 4U;
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 4U, tmp, 4U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -679,6 +708,7 @@ exp_vartime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[4U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -687,15 +717,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = (uint32_t)bits_l;
     const uint64_t *a_bits_l = table + bits_l32 * 4U;
     memcpy(tmp0, (uint64_t *)a_bits_l, 4U * sizeof (uint64_t));
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -720,9 +757,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 4U * sizeof (uint64_t));
     memcpy(ctx + 4U, r2, 4U * sizeof (uint64_t));
     uint64_t sw = 0ULL;
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -737,10 +775,16 @@ exp_consttime_precomp(
         uint64_t dummy = (0ULL - sw1) & (resM[i] ^ aM[i]);
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;);
-      uint64_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
       uint64_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint64_t aM_copy0[4U] = { 0U };
+      memcpy(aM_copy0, resM, 4U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -776,18 +820,26 @@ exp_consttime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 4U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 4U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 4U;
+    uint64_t aM_copy0[4U] = { 0U };
+    memcpy(aM_copy0, t11, 4U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 4U, tmp, 4U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 4U;
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 4U, tmp, 4U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -804,8 +856,8 @@ exp_consttime_precomp(
         0U,
         4U,
         1U,
-        uint64_t *os = resM;
         uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint64_t *os = resM;
         os[i] = x;););
   }
   else
@@ -813,6 +865,7 @@ exp_consttime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[4U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -821,10 +874,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 4U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -836,11 +893,14 @@ exp_consttime_precomp(
         0U,
         4U,
         1U,
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;););
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -1087,9 +1147,9 @@ Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init.
 */
 void Hacl_Bignum256_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1109,8 +1169,10 @@ Hacl_Bignum256_mod_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1141,8 +1203,10 @@ Hacl_Bignum256_mod_exp_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1173,8 +1237,10 @@ Hacl_Bignum256_mod_exp_consttime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1196,10 +1262,12 @@ Hacl_Bignum256_mod_inv_prime_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
   uint64_t n2[4U] = { 0U };
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
-  uint64_t *a1 = k1.n + 1U;
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, n[0U], 2ULL, n2);
+  uint64_t *a1 = n + 1U;
   uint64_t *res1 = n2 + 1U;
   uint64_t c = c0;
   KRML_MAYBE_FOR3(i,
@@ -1212,7 +1280,7 @@ Hacl_Bignum256_mod_inv_prime_vartime_precomp(
   uint64_t c1 = c;
   uint64_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 256U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 256U, n2, res);
 }
 
 
@@ -1254,9 +1322,9 @@ uint64_t *Hacl_Bignum256_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res2;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1295,11 +1363,11 @@ uint64_t *Hacl_Bignum256_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 8U + 1U; i++)
   {
-    uint64_t *os = res2;
     uint8_t *bj = tmp + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r1 = u;
     uint64_t x = r1;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
diff --git a/src/Hacl_Bignum256_32.c b/src/Hacl_Bignum256_32.c
index eed6c65c..ff8cfe9d 100644
--- a/src/Hacl_Bignum256_32.c
+++ b/src/Hacl_Bignum256_32.c
@@ -179,8 +179,8 @@ void Hacl_Bignum256_32_add_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -247,8 +247,8 @@ void Hacl_Bignum256_32_sub_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -301,8 +301,8 @@ void Hacl_Bignum256_32_sqr(uint32_t *a, uint32_t *res)
     0U,
     8U,
     1U,
-    uint32_t *ab = a;
     uint32_t a_j = a[i0];
+    uint32_t *ab = a;
     uint32_t *res_j = res + i0;
     uint32_t c = 0U;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -328,7 +328,12 @@ void Hacl_Bignum256_32_sqr(uint32_t *a, uint32_t *res)
     }
     uint32_t r = c;
     res[i0 + i0] = r;);
-  uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, res, res, res);
+  uint32_t a_copy0[16U] = { 0U };
+  uint32_t b_copy0[16U] = { 0U };
+  memcpy(a_copy0, res, 16U * sizeof (uint32_t));
+  memcpy(b_copy0, res, 16U * sizeof (uint32_t));
+  uint32_t r = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, a_copy0, b_copy0, res);
+  uint32_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint32_t tmp[16U] = { 0U };
   KRML_MAYBE_FOR8(i,
@@ -340,7 +345,12 @@ void Hacl_Bignum256_32_sqr(uint32_t *a, uint32_t *res)
     uint32_t lo = (uint32_t)res1;
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, res, tmp, res);
+  uint32_t a_copy[16U] = { 0U };
+  uint32_t b_copy[16U] = { 0U };
+  memcpy(a_copy, res, 16U * sizeof (uint32_t));
+  memcpy(b_copy, tmp, 16U * sizeof (uint32_t));
+  uint32_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, a_copy, b_copy, res);
+  uint32_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -352,7 +362,11 @@ static inline void precompr2(uint32_t nBits, uint32_t *n, uint32_t *res)
   res[i] = res[i] | 1U << j;
   for (uint32_t i0 = 0U; i0 < 512U - nBits; i0++)
   {
-    Hacl_Bignum256_32_add_mod(n, res, res, res);
+    uint32_t a_copy[8U] = { 0U };
+    uint32_t b_copy[8U] = { 0U };
+    memcpy(a_copy, res, 8U * sizeof (uint32_t));
+    memcpy(b_copy, res, 8U * sizeof (uint32_t));
+    Hacl_Bignum256_32_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -384,8 +398,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
       c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i););
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 8U + i0;
     uint32_t res_j = c[8U + i0];
+    uint32_t *resb = c + 8U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb););
   memcpy(res, c + 8U, 8U * sizeof (uint32_t));
   uint32_t c00 = c0;
@@ -417,8 +431,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -464,8 +478,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
       c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i););
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 8U + i0;
     uint32_t res_j = c[8U + i0];
+    uint32_t *resb = c + 8U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb););
   memcpy(res, c + 8U, 8U * sizeof (uint32_t));
   uint32_t c00 = c0;
@@ -477,8 +491,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -631,9 +645,10 @@ exp_vartime_precomp(
     uint32_t ctx[16U] = { 0U };
     memcpy(ctx, n, 8U * sizeof (uint32_t));
     memcpy(ctx + 8U, r2, 8U * sizeof (uint32_t));
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -642,11 +657,17 @@ exp_vartime_precomp(
       uint32_t bit = tmp >> j & 1U;
       if (!(bit == 0U))
       {
-        uint32_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint32_t aM_copy[8U] = { 0U };
+        memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
+        uint32_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint32_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -673,18 +694,26 @@ exp_vartime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 8U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 8U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 8U;
+    uint32_t aM_copy0[8U] = { 0U };
+    memcpy(aM_copy0, t11, 8U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 8U, tmp, 8U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 8U;
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 8U, tmp, 8U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -699,6 +728,7 @@ exp_vartime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[8U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -707,15 +737,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = bits_l;
     const uint32_t *a_bits_l = table + bits_l32 * 8U;
     memcpy(tmp0, (uint32_t *)a_bits_l, 8U * sizeof (uint32_t));
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -740,9 +777,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 8U * sizeof (uint32_t));
     memcpy(ctx + 8U, r2, 8U * sizeof (uint32_t));
     uint32_t sw = 0U;
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -757,10 +795,16 @@ exp_consttime_precomp(
         uint32_t dummy = (0U - sw1) & (resM[i] ^ aM[i]);
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;);
-      uint32_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
       uint32_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint32_t aM_copy0[8U] = { 0U };
+      memcpy(aM_copy0, resM, 8U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -796,18 +840,26 @@ exp_consttime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 8U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 8U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 8U;
+    uint32_t aM_copy0[8U] = { 0U };
+    memcpy(aM_copy0, t11, 8U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 8U, tmp, 8U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 8U;
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 8U, tmp, 8U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -824,8 +876,8 @@ exp_consttime_precomp(
         0U,
         8U,
         1U,
-        uint32_t *os = resM;
         uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint32_t *os = resM;
         os[i] = x;););
   }
   else
@@ -833,6 +885,7 @@ exp_consttime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[8U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -841,10 +894,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint32_t *)table, 8U * sizeof (uint32_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -856,11 +913,14 @@ exp_consttime_precomp(
         0U,
         8U,
         1U,
-        uint32_t *os = tmp0;
         uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint32_t *os = tmp0;
         os[i] = x;););
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -1121,9 +1181,9 @@ Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init.
 */
 void Hacl_Bignum256_32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1143,8 +1203,10 @@ Hacl_Bignum256_32_mod_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1175,8 +1237,10 @@ Hacl_Bignum256_32_mod_exp_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1207,8 +1271,10 @@ Hacl_Bignum256_32_mod_exp_consttime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1230,10 +1296,12 @@ Hacl_Bignum256_32_mod_inv_prime_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
   uint32_t n2[8U] = { 0U };
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
-  uint32_t *a1 = k1.n + 1U;
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, n[0U], 2U, n2);
+  uint32_t *a1 = n + 1U;
   uint32_t *res1 = n2 + 1U;
   uint32_t c = c0;
   {
@@ -1260,7 +1328,7 @@ Hacl_Bignum256_32_mod_inv_prime_vartime_precomp(
   uint32_t c1 = c;
   uint32_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 256U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 256U, n2, res);
 }
 
 
@@ -1302,9 +1370,9 @@ uint32_t *Hacl_Bignum256_32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint32_t *os = res2;
     uint32_t u = load32_be(tmp + (bnLen - i - 1U) * 4U);
     uint32_t x = u;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1343,11 +1411,11 @@ uint32_t *Hacl_Bignum256_32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 4U + 1U; i++)
   {
-    uint32_t *os = res2;
     uint8_t *bj = tmp + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r1 = u;
     uint32_t x = r1;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
diff --git a/src/Hacl_Bignum32.c b/src/Hacl_Bignum32.c
index 34b46324..13890aec 100644
--- a/src/Hacl_Bignum32.c
+++ b/src/Hacl_Bignum32.c
@@ -46,9 +46,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -60,7 +69,16 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -70,27 +88,57 @@ uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
-  Hacl_Bignum_bn_add_mod_n_u32(len, n, a, b, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t a_copy[len];
+  memset(a_copy, 0U, len * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t b_copy[len];
+  memset(b_copy, 0U, len * sizeof (uint32_t));
+  memcpy(a_copy, a, len * sizeof (uint32_t));
+  memcpy(b_copy, b, len * sizeof (uint32_t));
+  Hacl_Bignum_bn_add_mod_n_u32(len, n, a_copy, b_copy, res);
 }
 
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -100,8 +148,13 @@ void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b,
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -114,8 +167,10 @@ void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res)
 {
@@ -149,13 +204,19 @@ bn_slow_precomp(
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -195,22 +256,30 @@ bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -238,22 +307,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -281,18 +358,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -393,15 +475,16 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
-
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n)
@@ -429,13 +512,13 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -444,9 +527,11 @@ void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -455,30 +540,35 @@ Hacl_Bignum32_mod_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  bn_slow_precomp(len1, k1.n, k1.mu, k1.r2, a, res);
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  bn_slow_precomp(len1, n, mu, r2, a, res);
 }
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -489,37 +579,35 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -530,30 +618,27 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -562,17 +647,18 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
   KRML_CHECK_SIZE(sizeof (uint32_t), len1);
   uint32_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint32_t));
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, n[0U], 2U, n2);
   uint32_t c1;
   if (1U < len1)
   {
-    uint32_t *a1 = k1.n + 1U;
+    uint32_t *a1 = n + 1U;
     uint32_t *res1 = n2 + 1U;
     uint32_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -605,9 +691,9 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
   }
   KRML_MAYBE_UNUSED_VAR(c1);
   Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
+    n,
+    mu,
+    r2,
     a,
     32U * len1,
     n2,
@@ -623,13 +709,13 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 {
@@ -653,9 +739,9 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint32_t *os = res2;
     uint32_t u = load32_be(tmp + (bnLen - i - 1U) * 4U);
     uint32_t x = u;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -664,13 +750,13 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 {
@@ -694,11 +780,11 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 4U + 1U; i++)
   {
-    uint32_t *os = res2;
     uint8_t *bj = tmp + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r1 = u;
     uint32_t x = r1;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -707,8 +793,11 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -727,8 +816,11 @@ void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -753,7 +845,11 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
@@ -770,7 +866,11 @@ uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
diff --git a/src/Hacl_Bignum4096.c b/src/Hacl_Bignum4096.c
index 3572db07..5f674b3c 100644
--- a/src/Hacl_Bignum4096.c
+++ b/src/Hacl_Bignum4096.c
@@ -180,8 +180,8 @@ void Hacl_Bignum4096_add_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *re
   uint64_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -247,8 +247,8 @@ void Hacl_Bignum4096_sub_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *re
   uint64_t c2 = 0ULL - c00;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -285,7 +285,11 @@ static inline void precompr2(uint32_t nBits, uint64_t *n, uint64_t *res)
   res[i] = res[i] | 1ULL << j;
   for (uint32_t i0 = 0U; i0 < 8192U - nBits; i0++)
   {
-    Hacl_Bignum4096_add_mod(n, res, res, res);
+    uint64_t a_copy[64U] = { 0U };
+    uint64_t b_copy[64U] = { 0U };
+    memcpy(a_copy, res, 64U * sizeof (uint64_t));
+    memcpy(b_copy, res, 64U * sizeof (uint64_t));
+    Hacl_Bignum4096_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -315,8 +319,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
       c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i););
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 64U + i0;
     uint64_t res_j = c[64U + i0];
+    uint64_t *resb = c + 64U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + 64U, 64U * sizeof (uint64_t));
@@ -347,8 +351,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
   uint64_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -393,8 +397,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
       c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i););
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 64U + i0;
     uint64_t res_j = c[64U + i0];
+    uint64_t *resb = c + 64U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + 64U, 64U * sizeof (uint64_t));
@@ -405,8 +409,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
   uint64_t m = 0ULL - c00;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -557,9 +561,10 @@ exp_vartime_precomp(
     uint64_t ctx[128U] = { 0U };
     memcpy(ctx, n, 64U * sizeof (uint64_t));
     memcpy(ctx + 64U, r2, 64U * sizeof (uint64_t));
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -568,11 +573,17 @@ exp_vartime_precomp(
       uint64_t bit = tmp >> j & 1ULL;
       if (!(bit == 0ULL))
       {
-        uint64_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint64_t aM_copy[64U] = { 0U };
+        memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
+        uint64_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint64_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -599,18 +610,26 @@ exp_vartime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 64U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 64U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 64U;
+    uint64_t aM_copy0[64U] = { 0U };
+    memcpy(aM_copy0, t11, 64U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 64U, tmp, 64U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 64U;
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 64U, tmp, 64U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -625,6 +644,7 @@ exp_vartime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[64U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -633,15 +653,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = (uint32_t)bits_l;
     const uint64_t *a_bits_l = table + bits_l32 * 64U;
     memcpy(tmp0, (uint64_t *)a_bits_l, 64U * sizeof (uint64_t));
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -666,9 +693,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 64U * sizeof (uint64_t));
     memcpy(ctx + 64U, r2, 64U * sizeof (uint64_t));
     uint64_t sw = 0ULL;
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -682,10 +710,16 @@ exp_consttime_precomp(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint64_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
       uint64_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint64_t aM_copy0[64U] = { 0U };
+      memcpy(aM_copy0, resM, 64U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -720,18 +754,26 @@ exp_consttime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 64U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 64U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 64U;
+    uint64_t aM_copy0[64U] = { 0U };
+    memcpy(aM_copy0, t11, 64U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 64U, tmp, 64U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 64U;
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 64U, tmp, 64U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -746,8 +788,8 @@ exp_consttime_precomp(
       const uint64_t *res_j = table + (i1 + 1U) * 64U;
       for (uint32_t i = 0U; i < 64U; i++)
       {
-        uint64_t *os = resM;
         uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint64_t *os = resM;
         os[i] = x;
       });
   }
@@ -756,6 +798,7 @@ exp_consttime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[64U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -764,10 +807,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 64U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -777,12 +824,15 @@ exp_consttime_precomp(
       const uint64_t *res_j = table + (i1 + 1U) * 64U;
       for (uint32_t i = 0U; i < 64U; i++)
       {
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;
       });
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -1042,9 +1092,9 @@ Deallocate the memory previously allocated by Hacl_Bignum4096_mont_ctx_init.
 */
 void Hacl_Bignum4096_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1064,8 +1114,10 @@ Hacl_Bignum4096_mod_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1096,8 +1148,10 @@ Hacl_Bignum4096_mod_exp_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1128,8 +1182,10 @@ Hacl_Bignum4096_mod_exp_consttime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1151,10 +1207,12 @@ Hacl_Bignum4096_mod_inv_prime_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
   uint64_t n2[64U] = { 0U };
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
-  uint64_t *a1 = k1.n + 1U;
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, n[0U], 2ULL, n2);
+  uint64_t *a1 = n + 1U;
   uint64_t *res1 = n2 + 1U;
   uint64_t c = c0;
   KRML_MAYBE_FOR15(i,
@@ -1183,7 +1241,7 @@ Hacl_Bignum4096_mod_inv_prime_vartime_precomp(
   uint64_t c1 = c;
   uint64_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 4096U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 4096U, n2, res);
 }
 
 
@@ -1225,9 +1283,9 @@ uint64_t *Hacl_Bignum4096_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res2;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1266,11 +1324,11 @@ uint64_t *Hacl_Bignum4096_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 8U + 1U; i++)
   {
-    uint64_t *os = res2;
     uint8_t *bj = tmp + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r1 = u;
     uint64_t x = r1;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
diff --git a/src/Hacl_Bignum4096_32.c b/src/Hacl_Bignum4096_32.c
index 1a8b361c..97b98354 100644
--- a/src/Hacl_Bignum4096_32.c
+++ b/src/Hacl_Bignum4096_32.c
@@ -177,8 +177,8 @@ void Hacl_Bignum4096_32_add_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t
   uint32_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -242,8 +242,8 @@ void Hacl_Bignum4096_32_sub_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t
   uint32_t c2 = 0U - c00;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -280,7 +280,11 @@ static inline void precompr2(uint32_t nBits, uint32_t *n, uint32_t *res)
   res[i] = res[i] | 1U << j;
   for (uint32_t i0 = 0U; i0 < 8192U - nBits; i0++)
   {
-    Hacl_Bignum4096_32_add_mod(n, res, res, res);
+    uint32_t a_copy[128U] = { 0U };
+    uint32_t b_copy[128U] = { 0U };
+    memcpy(a_copy, res, 128U * sizeof (uint32_t));
+    memcpy(b_copy, res, 128U * sizeof (uint32_t));
+    Hacl_Bignum4096_32_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -309,8 +313,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 128U + i0;
     uint32_t res_j = c[128U + i0];
+    uint32_t *resb = c + 128U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + 128U, 128U * sizeof (uint32_t));
@@ -340,8 +344,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
   uint32_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -385,8 +389,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 128U + i0;
     uint32_t res_j = c[128U + i0];
+    uint32_t *resb = c + 128U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + 128U, 128U * sizeof (uint32_t));
@@ -397,8 +401,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
   uint32_t m = 0U - c00;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -549,9 +553,10 @@ exp_vartime_precomp(
     uint32_t ctx[256U] = { 0U };
     memcpy(ctx, n, 128U * sizeof (uint32_t));
     memcpy(ctx + 128U, r2, 128U * sizeof (uint32_t));
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -560,11 +565,17 @@ exp_vartime_precomp(
       uint32_t bit = tmp >> j & 1U;
       if (!(bit == 0U))
       {
-        uint32_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint32_t aM_copy[128U] = { 0U };
+        memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
+        uint32_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint32_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -591,18 +602,26 @@ exp_vartime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 128U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 128U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 128U;
+    uint32_t aM_copy0[128U] = { 0U };
+    memcpy(aM_copy0, t11, 128U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 128U, tmp, 128U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 128U;
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 128U, tmp, 128U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -617,6 +636,7 @@ exp_vartime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[128U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -625,15 +645,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = bits_l;
     const uint32_t *a_bits_l = table + bits_l32 * 128U;
     memcpy(tmp0, (uint32_t *)a_bits_l, 128U * sizeof (uint32_t));
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -658,9 +685,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 128U * sizeof (uint32_t));
     memcpy(ctx + 128U, r2, 128U * sizeof (uint32_t));
     uint32_t sw = 0U;
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -674,10 +702,16 @@ exp_consttime_precomp(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint32_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
       uint32_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint32_t aM_copy0[128U] = { 0U };
+      memcpy(aM_copy0, resM, 128U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -712,18 +746,26 @@ exp_consttime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 128U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 128U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 128U;
+    uint32_t aM_copy0[128U] = { 0U };
+    memcpy(aM_copy0, t11, 128U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 128U, tmp, 128U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 128U;
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 128U, tmp, 128U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -738,8 +780,8 @@ exp_consttime_precomp(
       const uint32_t *res_j = table + (i1 + 1U) * 128U;
       for (uint32_t i = 0U; i < 128U; i++)
       {
-        uint32_t *os = resM;
         uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint32_t *os = resM;
         os[i] = x;
       });
   }
@@ -748,6 +790,7 @@ exp_consttime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[128U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -756,10 +799,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint32_t *)table, 128U * sizeof (uint32_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -769,12 +816,15 @@ exp_consttime_precomp(
       const uint32_t *res_j = table + (i1 + 1U) * 128U;
       for (uint32_t i = 0U; i < 128U; i++)
       {
-        uint32_t *os = tmp0;
         uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint32_t *os = tmp0;
         os[i] = x;
       });
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -1034,9 +1084,9 @@ Deallocate the memory previously allocated by Hacl_Bignum4096_mont_ctx_init.
 */
 void Hacl_Bignum4096_32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1056,8 +1106,10 @@ Hacl_Bignum4096_32_mod_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1088,8 +1140,10 @@ Hacl_Bignum4096_32_mod_exp_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1120,8 +1174,10 @@ Hacl_Bignum4096_32_mod_exp_consttime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1143,10 +1199,12 @@ Hacl_Bignum4096_32_mod_inv_prime_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
   uint32_t n2[128U] = { 0U };
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
-  uint32_t *a1 = k1.n + 1U;
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, n[0U], 2U, n2);
+  uint32_t *a1 = n + 1U;
   uint32_t *res1 = n2 + 1U;
   uint32_t c = c0;
   for (uint32_t i = 0U; i < 31U; i++)
@@ -1174,7 +1232,7 @@ Hacl_Bignum4096_32_mod_inv_prime_vartime_precomp(
   uint32_t c1 = c;
   uint32_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 4096U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 4096U, n2, res);
 }
 
 
@@ -1216,9 +1274,9 @@ uint32_t *Hacl_Bignum4096_32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint32_t *os = res2;
     uint32_t u = load32_be(tmp + (bnLen - i - 1U) * 4U);
     uint32_t x = u;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1257,11 +1315,11 @@ uint32_t *Hacl_Bignum4096_32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 4U + 1U; i++)
   {
-    uint32_t *os = res2;
     uint8_t *bj = tmp + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r1 = u;
     uint32_t x = r1;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
diff --git a/src/Hacl_Bignum64.c b/src/Hacl_Bignum64.c
index f8f5bb6f..c1240c15 100644
--- a/src/Hacl_Bignum64.c
+++ b/src/Hacl_Bignum64.c
@@ -78,7 +78,15 @@ Write `(a + b) mod n` in `res`.
 */
 void Hacl_Bignum64_add_mod(uint32_t len, uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
 {
-  Hacl_Bignum_bn_add_mod_n_u64(len, n, a, b, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t a_copy[len];
+  memset(a_copy, 0U, len * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t b_copy[len];
+  memset(b_copy, 0U, len * sizeof (uint64_t));
+  memcpy(a_copy, a, len * sizeof (uint64_t));
+  memcpy(b_copy, b, len * sizeof (uint64_t));
+  Hacl_Bignum_bn_add_mod_n_u64(len, n, a_copy, b_copy, res);
 }
 
 /**
@@ -432,9 +440,9 @@ Deallocate the memory previously allocated by Hacl_Bignum64_mont_ctx_init.
 */
 void Hacl_Bignum64_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -454,10 +462,11 @@ Hacl_Bignum64_mod_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  bn_slow_precomp(len1, k1.n, k1.mu, k1.r2, a, res);
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  bn_slow_precomp(len1, n, mu, r2, a, res);
 }
 
 /**
@@ -488,17 +497,11 @@ Hacl_Bignum64_mod_exp_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -529,17 +532,11 @@ Hacl_Bignum64_mod_exp_consttime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -561,17 +558,18 @@ Hacl_Bignum64_mod_inv_prime_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
   KRML_CHECK_SIZE(sizeof (uint64_t), len1);
   uint64_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint64_t));
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, n[0U], 2ULL, n2);
   uint64_t c1;
   if (1U < len1)
   {
-    uint64_t *a1 = k1.n + 1U;
+    uint64_t *a1 = n + 1U;
     uint64_t *res1 = n2 + 1U;
     uint64_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -604,9 +602,9 @@ Hacl_Bignum64_mod_inv_prime_vartime_precomp(
   }
   KRML_MAYBE_UNUSED_VAR(c1);
   Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
+    n,
+    mu,
+    r2,
     a,
     64U * len1,
     n2,
@@ -652,9 +650,9 @@ uint64_t *Hacl_Bignum64_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res2;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -693,11 +691,11 @@ uint64_t *Hacl_Bignum64_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 8U + 1U; i++)
   {
-    uint64_t *os = res2;
     uint8_t *bj = tmp + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r1 = u;
     uint64_t x = r1;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
diff --git a/src/Hacl_Chacha20.c b/src/Hacl_Chacha20.c
index 38a5c373..cc5b5fb4 100644
--- a/src/Hacl_Chacha20.c
+++ b/src/Hacl_Chacha20.c
@@ -102,45 +102,43 @@ static inline void chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr)
     0U,
     16U,
     1U,
-    uint32_t *os = k;
     uint32_t x = k[i] + ctx[i];
+    uint32_t *os = k;
     os[i] = x;);
   k[12U] = k[12U] + ctr_u32;
 }
 
-static const
-uint32_t
-chacha20_constants[4U] = { 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U };
-
 void Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
 {
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
+    uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
     uint32_t *os = ctx;
-    uint32_t x = chacha20_constants[i];
     os[i] = x;);
+  uint32_t *uu____0 = ctx + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx[12U] = ctr;
+  uint32_t *uu____1 = ctx + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
 }
 
@@ -153,18 +151,18 @@ static void chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, u
     0U,
     16U,
     1U,
-    uint32_t *os = bl;
     uint8_t *bj = text + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = bl;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    uint32_t *os = bl;
     uint32_t x = bl[i] ^ k[i];
+    uint32_t *os = bl;
     os[i] = x;);
   KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(out + i * 4U, bl[i]););
 }
@@ -174,7 +172,9 @@ chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr,
 {
   uint8_t plain[64U] = { 0U };
   memcpy(plain, text, len * sizeof (uint8_t));
-  chacha20_encrypt_block(ctx, plain, incr, plain);
+  uint8_t plain_copy[64U] = { 0U };
+  memcpy(plain_copy, plain, 64U * sizeof (uint8_t));
+  chacha20_encrypt_block(ctx, plain, incr, plain_copy);
   memcpy(out, plain, len * sizeof (uint8_t));
 }
 
diff --git a/src/Hacl_Chacha20_Vec128.c b/src/Hacl_Chacha20_Vec128.c
index deab1dfc..1c49e409 100644
--- a/src/Hacl_Chacha20_Vec128.c
+++ b/src/Hacl_Chacha20_Vec128.c
@@ -153,8 +153,8 @@ chacha20_core_128(
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = k;
     Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]);
+    Lib_IntVector_Intrinsics_vec128 *os = k;
     os[i] = x;);
   k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv);
 }
@@ -167,37 +167,39 @@ chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n,
     0U,
     4U,
     1U,
-    uint32_t *os = ctx1;
     uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+    uint32_t *os = ctx1;
     os[i] = x;);
+  uint32_t *uu____0 = ctx1 + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx1 + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx1[12U] = ctr;
+  uint32_t *uu____1 = ctx1 + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx1 + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = ctx;
     uint32_t x = ctx1[i];
     Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x);
+    Lib_IntVector_Intrinsics_vec128 *os = ctx;
     os[i] = x0;);
   Lib_IntVector_Intrinsics_vec128 ctr1 = Lib_IntVector_Intrinsics_vec128_load32s(0U, 1U, 2U, 3U);
   Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U];
diff --git a/src/Hacl_Chacha20_Vec256.c b/src/Hacl_Chacha20_Vec256.c
index e61a7cfe..83195c90 100644
--- a/src/Hacl_Chacha20_Vec256.c
+++ b/src/Hacl_Chacha20_Vec256.c
@@ -153,8 +153,8 @@ chacha20_core_256(
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = k;
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = k;
     os[i] = x;);
   k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
 }
@@ -167,37 +167,39 @@ chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n,
     0U,
     4U,
     1U,
-    uint32_t *os = ctx1;
     uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+    uint32_t *os = ctx1;
     os[i] = x;);
+  uint32_t *uu____0 = ctx1 + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx1 + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx1[12U] = ctr;
+  uint32_t *uu____1 = ctx1 + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx1 + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = ctx;
     uint32_t x = ctx1[i];
     Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x);
+    Lib_IntVector_Intrinsics_vec256 *os = ctx;
     os[i] = x0;);
   Lib_IntVector_Intrinsics_vec256
   ctr1 = Lib_IntVector_Intrinsics_vec256_load32s(0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U);
diff --git a/src/Hacl_Chacha20_Vec32.c b/src/Hacl_Chacha20_Vec32.c
index 0dce915c..63f1e951 100644
--- a/src/Hacl_Chacha20_Vec32.c
+++ b/src/Hacl_Chacha20_Vec32.c
@@ -147,8 +147,8 @@ static inline void chacha20_core_32(uint32_t *k, uint32_t *ctx, uint32_t ctr)
     0U,
     16U,
     1U,
-    uint32_t *os = k;
     uint32_t x = k[i] + ctx[i];
+    uint32_t *os = k;
     os[i] = x;);
   k[12U] = k[12U] + cv;
 }
@@ -160,36 +160,38 @@ static inline void chacha20_init_32(uint32_t *ctx, uint8_t *k, uint8_t *n, uint3
     0U,
     4U,
     1U,
-    uint32_t *os = ctx1;
     uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+    uint32_t *os = ctx1;
     os[i] = x;);
+  uint32_t *uu____0 = ctx1 + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx1 + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx1[12U] = ctr;
+  uint32_t *uu____1 = ctx1 + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx1 + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    uint32_t *os = ctx;
     uint32_t x = ctx1[i];
+    uint32_t *os = ctx;
     os[i] = x;);
   uint32_t ctr1 = 0U;
   uint32_t c12 = ctx[12U];
diff --git a/src/Hacl_Curve25519_51.c b/src/Hacl_Curve25519_51.c
index ca561e89..2d1b7c76 100644
--- a/src/Hacl_Curve25519_51.c
+++ b/src/Hacl_Curve25519_51.c
@@ -38,64 +38,87 @@ static void point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_
   uint64_t *x1 = q;
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 5U;
-  uint64_t *z3 = nq_p1 + 5U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 5U;
-  uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 10U;
+  uint64_t *ab = tmp1;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 5U;
   Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2);
   Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2);
+  uint64_t *ab1 = tmp1;
   uint64_t *x3 = nq_p1;
   uint64_t *z31 = nq_p1 + 5U;
   uint64_t *d0 = dc;
   uint64_t *c0 = dc + 5U;
   Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31);
   Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31);
-  Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2);
-  Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0);
-  Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0);
-  uint64_t *a1 = tmp1;
-  uint64_t *b1 = tmp1 + 5U;
-  uint64_t *d = tmp1 + 10U;
-  uint64_t *c = tmp1 + 15U;
-  uint64_t *ab1 = tmp1;
+  uint64_t f1_copy0[10U] = { 0U };
+  memcpy(f1_copy0, dc, 10U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul2(dc, f1_copy0, ab1, tmp2);
+  uint64_t *d1 = dc;
+  uint64_t *c1 = dc + 5U;
+  Hacl_Impl_Curve25519_Field51_fadd(x3, d1, c1);
+  Hacl_Impl_Curve25519_Field51_fsub(z31, d1, c1);
+  uint64_t *ab2 = tmp1;
   uint64_t *dc1 = tmp1 + 10U;
-  Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2);
-  Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2);
+  Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab2, tmp2);
+  uint64_t f1_copy1[10U] = { 0U };
+  memcpy(f1_copy1, nq_p1, 10U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, f1_copy1, tmp2);
+  uint64_t *a1 = ab2;
+  uint64_t *b1 = ab2 + 5U;
+  uint64_t *d = dc1;
+  uint64_t *c = dc1 + 5U;
   a1[0U] = c[0U];
   a1[1U] = c[1U];
   a1[2U] = c[2U];
   a1[3U] = c[3U];
   a1[4U] = c[4U];
-  Hacl_Impl_Curve25519_Field51_fsub(c, d, c);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, c, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fsub(c, d, f2_copy);
   Hacl_Impl_Curve25519_Field51_fmul1(b1, c, 121665ULL);
-  Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d);
-  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2);
-  Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2);
+  uint64_t f1_copy2[5U] = { 0U };
+  memcpy(f1_copy2, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fadd(b1, f1_copy2, d);
+  uint64_t *ab3 = tmp1;
+  uint64_t *dc2 = tmp1 + 10U;
+  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc2, ab3, tmp2);
+  uint64_t *z310 = nq_p1 + 5U;
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, z310, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(z310, f1_copy, x1, tmp2);
 }
 
 static void point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2)
 {
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 5U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 5U;
-  uint64_t *d = tmp1 + 10U;
-  uint64_t *c = tmp1 + 15U;
   uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 10U;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 5U;
   Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2);
   Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2);
   Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2);
-  a[0U] = c[0U];
-  a[1U] = c[1U];
-  a[2U] = c[2U];
-  a[3U] = c[3U];
-  a[4U] = c[4U];
-  Hacl_Impl_Curve25519_Field51_fsub(c, d, c);
-  Hacl_Impl_Curve25519_Field51_fmul1(b, c, 121665ULL);
-  Hacl_Impl_Curve25519_Field51_fadd(b, b, d);
-  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2);
+  uint64_t *d = dc;
+  uint64_t *c = dc + 5U;
+  uint64_t *a1 = ab;
+  uint64_t *b1 = ab + 5U;
+  a1[0U] = c[0U];
+  a1[1U] = c[1U];
+  a1[2U] = c[2U];
+  a1[3U] = c[3U];
+  a1[4U] = c[4U];
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, c, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fsub(c, d, f2_copy);
+  Hacl_Impl_Curve25519_Field51_fmul1(b1, c, 121665ULL);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fadd(b1, f1_copy, d);
+  uint64_t *ab1 = tmp1;
+  uint64_t *dc1 = tmp1 + 10U;
+  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2);
 }
 
 static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
@@ -104,7 +127,6 @@ static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
   for (uint32_t _i = 0U; _i < 10U; ++_i)
     tmp2[_i] = FStar_UInt128_uint64_to_uint128(0ULL);
   uint64_t p01_tmp1_swap[41U] = { 0U };
-  uint64_t *p0 = p01_tmp1_swap;
   uint64_t *p01 = p01_tmp1_swap;
   uint64_t *p03 = p01;
   uint64_t *p11 = p01 + 10U;
@@ -121,34 +143,39 @@ static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
   z0[2U] = 0ULL;
   z0[3U] = 0ULL;
   z0[4U] = 0ULL;
+  uint64_t *swap = p01_tmp1_swap + 40U;
   uint64_t *p01_tmp1 = p01_tmp1_swap;
+  uint64_t *nq0 = p01_tmp1;
+  uint64_t *nq_p1 = p01_tmp1 + 10U;
+  Hacl_Impl_Curve25519_Field51_cswap2(1ULL, nq0, nq_p1);
   uint64_t *p01_tmp11 = p01_tmp1_swap;
-  uint64_t *nq1 = p01_tmp1_swap;
-  uint64_t *nq_p11 = p01_tmp1_swap + 10U;
-  uint64_t *swap = p01_tmp1_swap + 40U;
-  Hacl_Impl_Curve25519_Field51_cswap2(1ULL, nq1, nq_p11);
   point_add_and_double(init, p01_tmp11, tmp2);
   swap[0U] = 1ULL;
   for (uint32_t i = 0U; i < 251U; i++)
   {
     uint64_t *p01_tmp12 = p01_tmp1_swap;
     uint64_t *swap1 = p01_tmp1_swap + 40U;
-    uint64_t *nq2 = p01_tmp12;
-    uint64_t *nq_p12 = p01_tmp12 + 10U;
+    uint64_t *nq1 = p01_tmp12;
+    uint64_t *nq_p11 = p01_tmp12 + 10U;
     uint64_t bit = (uint64_t)((uint32_t)key[(253U - i) / 8U] >> (253U - i) % 8U & 1U);
     uint64_t sw = swap1[0U] ^ bit;
-    Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12);
+    Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11);
     point_add_and_double(init, p01_tmp12, tmp2);
     swap1[0U] = bit;
   }
   uint64_t sw = swap[0U];
+  uint64_t *p01_tmp12 = p01_tmp1_swap;
+  uint64_t *nq1 = p01_tmp12;
+  uint64_t *nq_p11 = p01_tmp12 + 10U;
   Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11);
-  uint64_t *nq10 = p01_tmp1;
-  uint64_t *tmp1 = p01_tmp1 + 20U;
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  memcpy(out, p0, 10U * sizeof (uint64_t));
+  uint64_t *p01_tmp10 = p01_tmp1_swap;
+  uint64_t *nq = p01_tmp10;
+  uint64_t *tmp1 = p01_tmp10 + 20U;
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  uint64_t *p010 = p01_tmp1_swap;
+  memcpy(out, p010, 10U * sizeof (uint64_t));
 }
 
 void
@@ -162,7 +189,9 @@ Hacl_Curve25519_51_fsquare_times(
   Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp);
   for (uint32_t i = 0U; i < n - 1U; i++)
   {
-    Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp);
+    uint64_t f1_copy[5U] = { 0U };
+    memcpy(f1_copy, o, 5U * sizeof (uint64_t));
+    Hacl_Impl_Curve25519_Field51_fsqr(o, f1_copy, tmp);
   }
 }
 
@@ -176,32 +205,59 @@ void Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tm
   Hacl_Curve25519_51_fsquare_times(a1, i, tmp10, 1U);
   Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, 2U);
   Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp);
-  Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp);
-  Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, 1U);
-  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp);
-  Hacl_Curve25519_51_fsquare_times(t010, b1, tmp10, 5U);
-  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, a1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(a1, b1, f2_copy, tmp);
+  FStar_UInt128_uint128 *tmp11 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t010, a1, tmp11, 1U);
+  uint64_t f2_copy0[5U] = { 0U };
+  memcpy(f2_copy0, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, f2_copy0, tmp);
+  FStar_UInt128_uint128 *tmp12 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t010, b1, tmp12, 5U);
+  uint64_t f2_copy1[5U] = { 0U };
+  memcpy(f2_copy1, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, f2_copy1, tmp);
   uint64_t *b10 = t1 + 5U;
   uint64_t *c10 = t1 + 10U;
   uint64_t *t011 = t1 + 15U;
-  FStar_UInt128_uint128 *tmp11 = tmp;
-  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, 10U);
+  FStar_UInt128_uint128 *tmp13 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp13, 10U);
   Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp);
-  Hacl_Curve25519_51_fsquare_times(t011, c10, tmp11, 20U);
-  Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp);
-  Hacl_Curve25519_51_fsquare_times(t011, t011, tmp11, 10U);
-  Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp);
-  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, 50U);
+  FStar_UInt128_uint128 *tmp110 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t011, c10, tmp110, 20U);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, t011, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(t011, f1_copy, c10, tmp);
+  FStar_UInt128_uint128 *tmp120 = tmp;
+  uint64_t i_copy0[5U] = { 0U };
+  memcpy(i_copy0, t011, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(t011, i_copy0, tmp120, 10U);
+  uint64_t f2_copy2[5U] = { 0U };
+  memcpy(f2_copy2, b10, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(b10, t011, f2_copy2, tmp);
+  FStar_UInt128_uint128 *tmp130 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp130, 50U);
   Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp);
   uint64_t *b11 = t1 + 5U;
   uint64_t *c1 = t1 + 10U;
   uint64_t *t01 = t1 + 15U;
   FStar_UInt128_uint128 *tmp1 = tmp;
   Hacl_Curve25519_51_fsquare_times(t01, c1, tmp1, 100U);
-  Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp);
-  Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, 50U);
-  Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp);
-  Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, 5U);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, t01, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(t01, f1_copy0, c1, tmp);
+  FStar_UInt128_uint128 *tmp111 = tmp;
+  uint64_t i_copy1[5U] = { 0U };
+  memcpy(i_copy1, t01, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(t01, i_copy1, tmp111, 50U);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, t01, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(t01, f1_copy1, b11, tmp);
+  FStar_UInt128_uint128 *tmp121 = tmp;
+  uint64_t i_copy[5U] = { 0U };
+  memcpy(i_copy, t01, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(t01, i_copy, tmp121, 5U);
   uint64_t *a = t1;
   uint64_t *t0 = t1 + 15U;
   Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp);
@@ -217,7 +273,9 @@ static void encode_point(uint8_t *o, uint64_t *i)
   for (uint32_t _i = 0U; _i < 10U; ++_i)
     tmp_w[_i] = FStar_UInt128_uint64_to_uint128(0ULL);
   Hacl_Curve25519_51_finv(tmp, z, tmp_w);
-  Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, tmp, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(tmp, f1_copy, x, tmp_w);
   Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp);
   KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, store64_le(o + i0 * 8U, u64s[i0]););
 }
@@ -232,16 +290,17 @@ Compute the scalar multiple of a point.
 void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
 {
   uint64_t init[10U] = { 0U };
+  uint64_t init_copy[10U] = { 0U };
   uint64_t tmp[4U] = { 0U };
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = tmp;
     uint8_t *bj = pub + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = tmp;
     os[i] = x;);
   uint64_t tmp3 = tmp[3U];
   tmp[3U] = tmp3 & 0x7fffffffffffffffULL;
@@ -265,7 +324,8 @@ void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
   x[2U] = f1h | f2l;
   x[3U] = f2h | f3l;
   x[4U] = f3h;
-  montgomery_ladder(init, priv, init);
+  memcpy(init_copy, init, 10U * sizeof (uint64_t));
+  montgomery_ladder(init, priv, init_copy);
   encode_point(out, init);
 }
 
@@ -282,8 +342,8 @@ void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv)
   uint8_t basepoint[32U] = { 0U };
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = basepoint;
     uint8_t x = g25519[i];
+    uint8_t *os = basepoint;
     os[i] = x;
   }
   Hacl_Curve25519_51_scalarmult(pub, priv, basepoint);
diff --git a/src/Hacl_Curve25519_64.c b/src/Hacl_Curve25519_64.c
index edcab306..0a0dd778 100644
--- a/src/Hacl_Curve25519_64.c
+++ b/src/Hacl_Curve25519_64.c
@@ -121,69 +121,91 @@ static void point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2
   uint64_t *x1 = q;
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 4U;
-  uint64_t *z3 = nq_p1 + 4U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 4U;
-  uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 8U;
+  uint64_t *ab = tmp1;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 4U;
   fadd0(a, x2, z2);
   fsub0(b, x2, z2);
+  uint64_t *ab1 = tmp1;
   uint64_t *x3 = nq_p1;
   uint64_t *z31 = nq_p1 + 4U;
   uint64_t *d0 = dc;
   uint64_t *c0 = dc + 4U;
   fadd0(c0, x3, z31);
   fsub0(d0, x3, z31);
-  fmul20(dc, dc, ab, tmp2);
-  fadd0(x3, d0, c0);
-  fsub0(z31, d0, c0);
-  uint64_t *a1 = tmp1;
-  uint64_t *b1 = tmp1 + 4U;
-  uint64_t *d = tmp1 + 8U;
-  uint64_t *c = tmp1 + 12U;
-  uint64_t *ab1 = tmp1;
+  uint64_t f1_copy0[8U] = { 0U };
+  memcpy(f1_copy0, dc, 8U * sizeof (uint64_t));
+  fmul20(dc, f1_copy0, ab1, tmp2);
+  uint64_t *d1 = dc;
+  uint64_t *c1 = dc + 4U;
+  fadd0(x3, d1, c1);
+  fsub0(z31, d1, c1);
+  uint64_t *ab2 = tmp1;
   uint64_t *dc1 = tmp1 + 8U;
-  fsqr20(dc1, ab1, tmp2);
-  fsqr20(nq_p1, nq_p1, tmp2);
+  fsqr20(dc1, ab2, tmp2);
+  uint64_t f1_copy1[8U] = { 0U };
+  memcpy(f1_copy1, nq_p1, 8U * sizeof (uint64_t));
+  fsqr20(nq_p1, f1_copy1, tmp2);
+  uint64_t *a1 = ab2;
+  uint64_t *b1 = ab2 + 4U;
+  uint64_t *d = dc1;
+  uint64_t *c = dc1 + 4U;
   a1[0U] = c[0U];
   a1[1U] = c[1U];
   a1[2U] = c[2U];
   a1[3U] = c[3U];
-  fsub0(c, d, c);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, c, 4U * sizeof (uint64_t));
+  fsub0(c, d, f2_copy);
   fmul_scalar0(b1, c, 121665ULL);
-  fadd0(b1, b1, d);
-  fmul20(nq, dc1, ab1, tmp2);
-  fmul0(z3, z3, x1, tmp2);
+  uint64_t f1_copy2[4U] = { 0U };
+  memcpy(f1_copy2, b1, 4U * sizeof (uint64_t));
+  fadd0(b1, f1_copy2, d);
+  uint64_t *ab3 = tmp1;
+  uint64_t *dc2 = tmp1 + 8U;
+  fmul20(nq, dc2, ab3, tmp2);
+  uint64_t *z310 = nq_p1 + 4U;
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, z310, 4U * sizeof (uint64_t));
+  fmul0(z310, f1_copy, x1, tmp2);
 }
 
 static void point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2)
 {
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 4U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 4U;
-  uint64_t *d = tmp1 + 8U;
-  uint64_t *c = tmp1 + 12U;
   uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 8U;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 4U;
   fadd0(a, x2, z2);
   fsub0(b, x2, z2);
   fsqr20(dc, ab, tmp2);
-  a[0U] = c[0U];
-  a[1U] = c[1U];
-  a[2U] = c[2U];
-  a[3U] = c[3U];
-  fsub0(c, d, c);
-  fmul_scalar0(b, c, 121665ULL);
-  fadd0(b, b, d);
-  fmul20(nq, dc, ab, tmp2);
+  uint64_t *d = dc;
+  uint64_t *c = dc + 4U;
+  uint64_t *a1 = ab;
+  uint64_t *b1 = ab + 4U;
+  a1[0U] = c[0U];
+  a1[1U] = c[1U];
+  a1[2U] = c[2U];
+  a1[3U] = c[3U];
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, c, 4U * sizeof (uint64_t));
+  fsub0(c, d, f2_copy);
+  fmul_scalar0(b1, c, 121665ULL);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, b1, 4U * sizeof (uint64_t));
+  fadd0(b1, f1_copy, d);
+  uint64_t *ab1 = tmp1;
+  uint64_t *dc1 = tmp1 + 8U;
+  fmul20(nq, dc1, ab1, tmp2);
 }
 
 static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
 {
   uint64_t tmp2[16U] = { 0U };
   uint64_t p01_tmp1_swap[33U] = { 0U };
-  uint64_t *p0 = p01_tmp1_swap;
   uint64_t *p01 = p01_tmp1_swap;
   uint64_t *p03 = p01;
   uint64_t *p11 = p01 + 8U;
@@ -198,34 +220,39 @@ static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
   z0[1U] = 0ULL;
   z0[2U] = 0ULL;
   z0[3U] = 0ULL;
+  uint64_t *swap = p01_tmp1_swap + 32U;
   uint64_t *p01_tmp1 = p01_tmp1_swap;
+  uint64_t *nq0 = p01_tmp1;
+  uint64_t *nq_p1 = p01_tmp1 + 8U;
+  cswap20(1ULL, nq0, nq_p1);
   uint64_t *p01_tmp11 = p01_tmp1_swap;
-  uint64_t *nq1 = p01_tmp1_swap;
-  uint64_t *nq_p11 = p01_tmp1_swap + 8U;
-  uint64_t *swap = p01_tmp1_swap + 32U;
-  cswap20(1ULL, nq1, nq_p11);
   point_add_and_double(init, p01_tmp11, tmp2);
   swap[0U] = 1ULL;
   for (uint32_t i = 0U; i < 251U; i++)
   {
     uint64_t *p01_tmp12 = p01_tmp1_swap;
     uint64_t *swap1 = p01_tmp1_swap + 32U;
-    uint64_t *nq2 = p01_tmp12;
-    uint64_t *nq_p12 = p01_tmp12 + 8U;
+    uint64_t *nq1 = p01_tmp12;
+    uint64_t *nq_p11 = p01_tmp12 + 8U;
     uint64_t bit = (uint64_t)((uint32_t)key[(253U - i) / 8U] >> (253U - i) % 8U & 1U);
     uint64_t sw = swap1[0U] ^ bit;
-    cswap20(sw, nq2, nq_p12);
+    cswap20(sw, nq1, nq_p11);
     point_add_and_double(init, p01_tmp12, tmp2);
     swap1[0U] = bit;
   }
   uint64_t sw = swap[0U];
+  uint64_t *p01_tmp12 = p01_tmp1_swap;
+  uint64_t *nq1 = p01_tmp12;
+  uint64_t *nq_p11 = p01_tmp12 + 8U;
   cswap20(sw, nq1, nq_p11);
-  uint64_t *nq10 = p01_tmp1;
-  uint64_t *tmp1 = p01_tmp1 + 16U;
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  memcpy(out, p0, 8U * sizeof (uint64_t));
+  uint64_t *p01_tmp10 = p01_tmp1_swap;
+  uint64_t *nq = p01_tmp10;
+  uint64_t *tmp1 = p01_tmp10 + 16U;
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  uint64_t *p010 = p01_tmp1_swap;
+  memcpy(out, p010, 8U * sizeof (uint64_t));
 }
 
 static void fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n)
@@ -233,7 +260,9 @@ static void fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n)
   fsqr0(o, inp, tmp);
   for (uint32_t i = 0U; i < n - 1U; i++)
   {
-    fsqr0(o, o, tmp);
+    uint64_t f1_copy[4U] = { 0U };
+    memcpy(f1_copy, o, 4U * sizeof (uint64_t));
+    fsqr0(o, f1_copy, tmp);
   }
 }
 
@@ -247,32 +276,59 @@ static void finv(uint64_t *o, uint64_t *i, uint64_t *tmp)
   fsquare_times(a1, i, tmp10, 1U);
   fsquare_times(t010, a1, tmp10, 2U);
   fmul0(b1, t010, i, tmp);
-  fmul0(a1, b1, a1, tmp);
-  fsquare_times(t010, a1, tmp10, 1U);
-  fmul0(b1, t010, b1, tmp);
-  fsquare_times(t010, b1, tmp10, 5U);
-  fmul0(b1, t010, b1, tmp);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, a1, 4U * sizeof (uint64_t));
+  fmul0(a1, b1, f2_copy, tmp);
+  uint64_t *tmp11 = tmp;
+  fsquare_times(t010, a1, tmp11, 1U);
+  uint64_t f2_copy0[4U] = { 0U };
+  memcpy(f2_copy0, b1, 4U * sizeof (uint64_t));
+  fmul0(b1, t010, f2_copy0, tmp);
+  uint64_t *tmp12 = tmp;
+  fsquare_times(t010, b1, tmp12, 5U);
+  uint64_t f2_copy1[4U] = { 0U };
+  memcpy(f2_copy1, b1, 4U * sizeof (uint64_t));
+  fmul0(b1, t010, f2_copy1, tmp);
   uint64_t *b10 = t1 + 4U;
   uint64_t *c10 = t1 + 8U;
   uint64_t *t011 = t1 + 12U;
-  uint64_t *tmp11 = tmp;
-  fsquare_times(t011, b10, tmp11, 10U);
+  uint64_t *tmp13 = tmp;
+  fsquare_times(t011, b10, tmp13, 10U);
   fmul0(c10, t011, b10, tmp);
-  fsquare_times(t011, c10, tmp11, 20U);
-  fmul0(t011, t011, c10, tmp);
-  fsquare_times(t011, t011, tmp11, 10U);
-  fmul0(b10, t011, b10, tmp);
-  fsquare_times(t011, b10, tmp11, 50U);
+  uint64_t *tmp110 = tmp;
+  fsquare_times(t011, c10, tmp110, 20U);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, t011, 4U * sizeof (uint64_t));
+  fmul0(t011, f1_copy, c10, tmp);
+  uint64_t *tmp120 = tmp;
+  uint64_t i_copy0[4U] = { 0U };
+  memcpy(i_copy0, t011, 4U * sizeof (uint64_t));
+  fsquare_times(t011, i_copy0, tmp120, 10U);
+  uint64_t f2_copy2[4U] = { 0U };
+  memcpy(f2_copy2, b10, 4U * sizeof (uint64_t));
+  fmul0(b10, t011, f2_copy2, tmp);
+  uint64_t *tmp130 = tmp;
+  fsquare_times(t011, b10, tmp130, 50U);
   fmul0(c10, t011, b10, tmp);
   uint64_t *b11 = t1 + 4U;
   uint64_t *c1 = t1 + 8U;
   uint64_t *t01 = t1 + 12U;
   uint64_t *tmp1 = tmp;
   fsquare_times(t01, c1, tmp1, 100U);
-  fmul0(t01, t01, c1, tmp);
-  fsquare_times(t01, t01, tmp1, 50U);
-  fmul0(t01, t01, b11, tmp);
-  fsquare_times(t01, t01, tmp1, 5U);
+  uint64_t f1_copy0[4U] = { 0U };
+  memcpy(f1_copy0, t01, 4U * sizeof (uint64_t));
+  fmul0(t01, f1_copy0, c1, tmp);
+  uint64_t *tmp111 = tmp;
+  uint64_t i_copy1[4U] = { 0U };
+  memcpy(i_copy1, t01, 4U * sizeof (uint64_t));
+  fsquare_times(t01, i_copy1, tmp111, 50U);
+  uint64_t f1_copy1[4U] = { 0U };
+  memcpy(f1_copy1, t01, 4U * sizeof (uint64_t));
+  fmul0(t01, f1_copy1, b11, tmp);
+  uint64_t *tmp121 = tmp;
+  uint64_t i_copy[4U] = { 0U };
+  memcpy(i_copy, t01, 4U * sizeof (uint64_t));
+  fsquare_times(t01, i_copy, tmp121, 5U);
   uint64_t *a = t1;
   uint64_t *t0 = t1 + 12U;
   fmul0(o, t0, a, tmp);
@@ -319,7 +375,9 @@ static void encode_point(uint8_t *o, uint64_t *i)
   uint64_t u64s[4U] = { 0U };
   uint64_t tmp_w[16U] = { 0U };
   finv(tmp, z, tmp_w);
-  fmul0(tmp, tmp, x, tmp_w);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, tmp, 4U * sizeof (uint64_t));
+  fmul0(tmp, f1_copy, x, tmp_w);
   store_felem(u64s, tmp);
   KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, store64_le(o + i0 * 8U, u64s[i0]););
 }
@@ -334,16 +392,17 @@ Compute the scalar multiple of a point.
 void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
 {
   uint64_t init[8U] = { 0U };
+  uint64_t init_copy[8U] = { 0U };
   uint64_t tmp[4U] = { 0U };
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = tmp;
     uint8_t *bj = pub + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = tmp;
     os[i] = x;);
   uint64_t tmp3 = tmp[3U];
   tmp[3U] = tmp3 & 0x7fffffffffffffffULL;
@@ -357,7 +416,8 @@ void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
   x[1U] = tmp[1U];
   x[2U] = tmp[2U];
   x[3U] = tmp[3U];
-  montgomery_ladder(init, priv, init);
+  memcpy(init_copy, init, 8U * sizeof (uint64_t));
+  montgomery_ladder(init, priv, init_copy);
   encode_point(out, init);
 }
 
@@ -374,8 +434,8 @@ void Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv)
   uint8_t basepoint[32U] = { 0U };
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = basepoint;
     uint8_t x = g25519[i];
+    uint8_t *os = basepoint;
     os[i] = x;
   }
   Hacl_Curve25519_64_scalarmult(pub, priv, basepoint);
diff --git a/src/Hacl_EC_K256.c b/src/Hacl_EC_K256.c
index 581c223b..d5f6e1a9 100644
--- a/src/Hacl_EC_K256.c
+++ b/src/Hacl_EC_K256.c
@@ -267,9 +267,9 @@ void Hacl_EC_K256_point_mul(uint8_t *scalar, uint64_t *p, uint64_t *out)
     0U,
     4U,
     1U,
-    uint64_t *os = scalar_q;
     uint64_t u = load64_be(scalar + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = scalar_q;
     os[i] = x;);
   Hacl_Impl_K256_PointMul_point_mul(out, scalar_q, p);
 }
diff --git a/src/Hacl_Ed25519.c b/src/Hacl_Ed25519.c
index d1f8edf2..992ce5c3 100644
--- a/src/Hacl_Ed25519.c
+++ b/src/Hacl_Ed25519.c
@@ -144,7 +144,9 @@ static inline void fsquare_times_inplace(uint64_t *output, uint32_t count)
   FStar_UInt128_uint128 tmp[5U];
   for (uint32_t _i = 0U; _i < 5U; ++_i)
     tmp[_i] = FStar_UInt128_uint64_to_uint128(0ULL);
-  Hacl_Curve25519_51_fsquare_times(output, output, tmp, count);
+  uint64_t input[5U] = { 0U };
+  memcpy(input, output, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(output, input, tmp, count);
 }
 
 void Hacl_Bignum25519_inverse(uint64_t *out, uint64_t *a)
@@ -215,11 +217,11 @@ void Hacl_Bignum25519_load_51(uint64_t *output, uint8_t *input)
     0U,
     4U,
     1U,
-    uint64_t *os = u64s;
     uint8_t *bj = input + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = u64s;
     os[i] = x;);
   uint64_t u64s3 = u64s[3U];
   u64s[3U] = u64s3 & 0x7fffffffffffffffULL;
@@ -252,7 +254,9 @@ void Hacl_Impl_Ed25519_PointDouble_point_double(uint64_t *out, uint64_t *p)
   fsum(tmp30, tmp1, tmp20);
   fdifference(tmp40, tmp1, tmp20);
   fsquare(tmp1, z1);
-  times_2(tmp1, tmp1);
+  uint64_t a_copy[5U] = { 0U };
+  memcpy(a_copy, tmp1, 5U * sizeof (uint64_t));
+  times_2(tmp1, a_copy);
   uint64_t *tmp10 = tmp;
   uint64_t *tmp2 = tmp + 5U;
   uint64_t *tmp3 = tmp + 10U;
@@ -260,12 +264,18 @@ void Hacl_Impl_Ed25519_PointDouble_point_double(uint64_t *out, uint64_t *p)
   uint64_t *x1 = p;
   uint64_t *y1 = p + 5U;
   fsum(tmp2, x1, y1);
-  fsquare(tmp2, tmp2);
+  uint64_t a_copy0[5U] = { 0U };
+  memcpy(a_copy0, tmp2, 5U * sizeof (uint64_t));
+  fsquare(tmp2, a_copy0);
   Hacl_Bignum25519_reduce_513(tmp3);
-  fdifference(tmp2, tmp3, tmp2);
+  uint64_t b_copy[5U] = { 0U };
+  memcpy(b_copy, tmp2, 5U * sizeof (uint64_t));
+  fdifference(tmp2, tmp3, b_copy);
   Hacl_Bignum25519_reduce_513(tmp10);
   Hacl_Bignum25519_reduce_513(tmp4);
-  fsum(tmp10, tmp10, tmp4);
+  uint64_t a_copy1[5U] = { 0U };
+  memcpy(a_copy1, tmp10, 5U * sizeof (uint64_t));
+  fsum(tmp10, a_copy1, tmp4);
   uint64_t *tmp_f = tmp;
   uint64_t *tmp_e = tmp + 5U;
   uint64_t *tmp_h = tmp + 10U;
@@ -308,12 +318,18 @@ void Hacl_Impl_Ed25519_PointAdd_point_add(uint64_t *out, uint64_t *p, uint64_t *
   uint64_t *z2 = q + 10U;
   uint64_t *t2 = q + 15U;
   times_2d(tmp10, t1);
-  fmul0(tmp10, tmp10, t2);
+  uint64_t inp_copy[5U] = { 0U };
+  memcpy(inp_copy, tmp10, 5U * sizeof (uint64_t));
+  fmul0(tmp10, inp_copy, t2);
   times_2(tmp2, z1);
-  fmul0(tmp2, tmp2, z2);
+  uint64_t inp_copy0[5U] = { 0U };
+  memcpy(inp_copy0, tmp2, 5U * sizeof (uint64_t));
+  fmul0(tmp2, inp_copy0, z2);
   fdifference(tmp5, tmp4, tmp3);
   fdifference(tmp6, tmp2, tmp10);
-  fsum(tmp10, tmp2, tmp10);
+  uint64_t a_copy[5U] = { 0U };
+  memcpy(a_copy, tmp10, 5U * sizeof (uint64_t));
+  fsum(tmp10, a_copy, tmp2);
   fsum(tmp2, tmp4, tmp3);
   uint64_t *tmp_g = tmp;
   uint64_t *tmp_h = tmp + 5U;
@@ -367,17 +383,27 @@ static inline void pow2_252m2(uint64_t *out, uint64_t *z)
   fsquare_times(a, z, 1U);
   fsquare_times(t00, a, 2U);
   fmul0(b0, t00, z);
-  fmul0(a, b0, a);
+  uint64_t inp_copy0[5U] = { 0U };
+  memcpy(inp_copy0, a, 5U * sizeof (uint64_t));
+  fmul0(a, inp_copy0, b0);
   fsquare_times(t00, a, 1U);
-  fmul0(b0, t00, b0);
+  uint64_t inp_copy1[5U] = { 0U };
+  memcpy(inp_copy1, b0, 5U * sizeof (uint64_t));
+  fmul0(b0, inp_copy1, t00);
   fsquare_times(t00, b0, 5U);
-  fmul0(b0, t00, b0);
+  uint64_t inp_copy2[5U] = { 0U };
+  memcpy(inp_copy2, b0, 5U * sizeof (uint64_t));
+  fmul0(b0, inp_copy2, t00);
   fsquare_times(t00, b0, 10U);
   fmul0(c0, t00, b0);
   fsquare_times(t00, c0, 20U);
-  fmul0(t00, t00, c0);
+  uint64_t inp_copy3[5U] = { 0U };
+  memcpy(inp_copy3, t00, 5U * sizeof (uint64_t));
+  fmul0(t00, inp_copy3, c0);
   fsquare_times_inplace(t00, 10U);
-  fmul0(b0, t00, b0);
+  uint64_t inp_copy4[5U] = { 0U };
+  memcpy(inp_copy4, b0, 5U * sizeof (uint64_t));
+  fmul0(b0, inp_copy4, t00);
   fsquare_times(t00, b0, 50U);
   uint64_t *a0 = buf;
   uint64_t *t0 = buf + 5U;
@@ -386,9 +412,13 @@ static inline void pow2_252m2(uint64_t *out, uint64_t *z)
   fsquare_times(a0, z, 1U);
   fmul0(c, t0, b);
   fsquare_times(t0, c, 100U);
-  fmul0(t0, t0, c);
+  uint64_t inp_copy[5U] = { 0U };
+  memcpy(inp_copy, t0, 5U * sizeof (uint64_t));
+  fmul0(t0, inp_copy, c);
   fsquare_times_inplace(t0, 50U);
-  fmul0(t0, t0, b);
+  uint64_t inp_copy5[5U] = { 0U };
+  memcpy(inp_copy5, t0, 5U * sizeof (uint64_t));
+  fmul0(t0, inp_copy5, b);
   fsquare_times_inplace(t0, 2U);
   fmul0(out, t0, a0);
 }
@@ -411,7 +441,9 @@ static inline void mul_modp_sqrt_m1(uint64_t *x)
   sqrt_m1[2U] = 0x0007ef5e9cbd0c60ULL;
   sqrt_m1[3U] = 0x00078595a6804c9eULL;
   sqrt_m1[4U] = 0x0002b8324804fc1dULL;
-  fmul0(x, x, sqrt_m1);
+  uint64_t inp_copy[5U] = { 0U };
+  memcpy(inp_copy, x, 5U * sizeof (uint64_t));
+  fmul0(x, inp_copy, sqrt_m1);
 }
 
 static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
@@ -450,11 +482,15 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
     one[4U] = 0ULL;
     fsquare(y2, y);
     times_d(dyy, y2);
-    fsum(dyy, dyy, one);
+    uint64_t a_copy0[5U] = { 0U };
+    memcpy(a_copy0, dyy, 5U * sizeof (uint64_t));
+    fsum(dyy, a_copy0, one);
     Hacl_Bignum25519_reduce_513(dyy);
     Hacl_Bignum25519_inverse(dyyi, dyy);
     fdifference(x2, y2, one);
-    fmul0(x2, x2, dyyi);
+    uint64_t inp_copy[5U] = { 0U };
+    memcpy(inp_copy, x2, 5U * sizeof (uint64_t));
+    fmul0(x2, inp_copy, dyyi);
     reduce(x2);
     bool x2_is_0 = is_0(x2);
     uint8_t z;
@@ -493,7 +529,9 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
       uint64_t *t00 = tmp + 10U;
       pow2_252m2(x31, x210);
       fsquare(t00, x31);
-      fdifference(t00, t00, x210);
+      uint64_t a_copy1[5U] = { 0U };
+      memcpy(a_copy1, t00, 5U * sizeof (uint64_t));
+      fdifference(t00, a_copy1, x210);
       Hacl_Bignum25519_reduce_513(t00);
       reduce(t00);
       bool t0_is_0 = is_0(t00);
@@ -505,7 +543,9 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
       uint64_t *x3 = tmp + 5U;
       uint64_t *t01 = tmp + 10U;
       fsquare(t01, x3);
-      fdifference(t01, t01, x211);
+      uint64_t a_copy[5U] = { 0U };
+      memcpy(a_copy, t01, 5U * sizeof (uint64_t));
+      fdifference(t01, a_copy, x211);
       Hacl_Bignum25519_reduce_513(t01);
       reduce(t01);
       bool z1 = is_0(t01);
@@ -527,7 +567,9 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
           t0[2U] = 0ULL;
           t0[3U] = 0ULL;
           t0[4U] = 0ULL;
-          fdifference(x32, t0, x32);
+          uint64_t b_copy[5U] = { 0U };
+          memcpy(b_copy, x32, 5U * sizeof (uint64_t));
+          fdifference(x32, t0, b_copy);
           Hacl_Bignum25519_reduce_513(x32);
           reduce(x32);
         }
@@ -578,20 +620,20 @@ bool Hacl_Impl_Ed25519_PointDecompress_point_decompress(uint64_t *out, uint8_t *
 void Hacl_Impl_Ed25519_PointCompress_point_compress(uint8_t *z, uint64_t *p)
 {
   uint64_t tmp[15U] = { 0U };
+  uint64_t *zinv = tmp;
   uint64_t *x = tmp + 5U;
-  uint64_t *out = tmp + 10U;
-  uint64_t *zinv1 = tmp;
-  uint64_t *x1 = tmp + 5U;
-  uint64_t *out1 = tmp + 10U;
+  uint64_t *out0 = tmp + 10U;
   uint64_t *px = p;
   uint64_t *py = p + 5U;
   uint64_t *pz = p + 10U;
-  Hacl_Bignum25519_inverse(zinv1, pz);
-  fmul0(x1, px, zinv1);
-  reduce(x1);
-  fmul0(out1, py, zinv1);
-  Hacl_Bignum25519_reduce_513(out1);
-  uint64_t x0 = x[0U];
+  Hacl_Bignum25519_inverse(zinv, pz);
+  fmul0(x, px, zinv);
+  reduce(x);
+  fmul0(out0, py, zinv);
+  Hacl_Bignum25519_reduce_513(out0);
+  uint64_t *x1 = tmp + 5U;
+  uint64_t *out = tmp + 10U;
+  uint64_t x0 = x1[0U];
   uint64_t b = x0 & 1ULL;
   Hacl_Bignum25519_store_51(z, out);
   uint8_t xbyte = (uint8_t)b;
@@ -1246,11 +1288,11 @@ void Hacl_Impl_Ed25519_Ladder_point_mul(uint64_t *out, uint8_t *scalar, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar;
     uint8_t *bj = scalar + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar;
     os[i] = x;);
   uint64_t table[320U] = { 0U };
   uint64_t tmp[20U] = { 0U };
@@ -1258,23 +1300,35 @@ void Hacl_Impl_Ed25519_Ladder_point_mul(uint64_t *out, uint8_t *scalar, uint64_t
   uint64_t *t1 = table + 20U;
   Hacl_Impl_Ed25519_PointConstants_make_point_inf(t0);
   memcpy(t1, q, 20U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 20U;
-    Hacl_Impl_Ed25519_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, t11, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointDouble_point_double(tmp, p_copy0);
     memcpy(table + (2U * i + 2U) * 20U, tmp, 20U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 20U;
-    Hacl_Impl_Ed25519_PointAdd_point_add(tmp, q, t2);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, q, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table + (2U * i + 3U) * 20U, tmp, 20U * sizeof (uint64_t)););
   Hacl_Impl_Ed25519_PointConstants_make_point_inf(out);
   uint64_t tmp0[20U] = { 0U };
   for (uint32_t i0 = 0U; i0 < 64U; i0++)
   {
-    KRML_MAYBE_FOR4(i, 0U, 4U, 1U, Hacl_Impl_Ed25519_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[20U] = { 0U };
+      memcpy(p_copy, out, 20U * sizeof (uint64_t));
+      Hacl_Impl_Ed25519_PointDouble_point_double(out, p_copy););
     uint32_t k = 256U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, bscalar, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 20U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -1284,11 +1338,13 @@ void Hacl_Impl_Ed25519_Ladder_point_mul(uint64_t *out, uint8_t *scalar, uint64_t
       const uint64_t *res_j = table + (i1 + 1U) * 20U;
       for (uint32_t i = 0U; i < 20U; i++)
       {
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;
       });
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp0);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy, tmp0);
   }
 }
 
@@ -1303,8 +1359,8 @@ static inline void precomp_get_consttime(const uint64_t *table, uint64_t bits_l,
     const uint64_t *res_j = table + (i0 + 1U) * 20U;
     for (uint32_t i = 0U; i < 20U; i++)
     {
-      uint64_t *os = tmp;
       uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+      uint64_t *os = tmp;
       os[i] = x;
     });
 }
@@ -1316,11 +1372,11 @@ static inline void point_mul_g(uint64_t *out, uint8_t *scalar)
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar;
     uint8_t *bj = scalar + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar;
     os[i] = x;);
   uint64_t q1[20U] = { 0U };
   uint64_t *gx = q1;
@@ -1384,23 +1440,41 @@ static inline void point_mul_g(uint64_t *out, uint8_t *scalar)
     0U,
     16U,
     1U,
-    KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, Hacl_Impl_Ed25519_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i0,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[20U] = { 0U };
+      memcpy(p_copy, out, 20U * sizeof (uint64_t));
+      Hacl_Impl_Ed25519_PointDouble_point_double(out, p_copy););
     uint32_t k = 64U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r4, k, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_g_pow2_192_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy, tmp);
     uint32_t k0 = 64U - 4U * i - 4U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r3, k0, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_g_pow2_128_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy0, tmp);
     uint32_t k1 = 64U - 4U * i - 4U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r2, k1, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_g_pow2_64_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy1[20U] = { 0U };
+    memcpy(p_copy1, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy1, tmp);
     uint32_t k2 = 64U - 4U * i - 4U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r1, k2, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp););
+    uint64_t p_copy2[20U] = { 0U };
+    memcpy(p_copy2, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy2, tmp););
   KRML_MAYBE_UNUSED_VAR(q2);
   KRML_MAYBE_UNUSED_VAR(q3);
   KRML_MAYBE_UNUSED_VAR(q4);
@@ -1441,21 +1515,21 @@ point_mul_g_double_vartime(uint64_t *out, uint8_t *scalar1, uint8_t *scalar2, ui
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar1;
     uint8_t *bj = scalar1 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar1;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar2;
     uint8_t *bj = scalar2 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar2;
     os[i] = x;);
   uint64_t table2[640U] = { 0U };
   uint64_t tmp1[20U] = { 0U };
@@ -1463,15 +1537,20 @@ point_mul_g_double_vartime(uint64_t *out, uint8_t *scalar1, uint8_t *scalar2, ui
   uint64_t *t1 = table2 + 20U;
   Hacl_Impl_Ed25519_PointConstants_make_point_inf(t0);
   memcpy(t1, q2, 20U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 20U;
-    Hacl_Impl_Ed25519_PointDouble_point_double(tmp1, t11);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, t11, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointDouble_point_double(tmp1, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 20U, tmp1, 20U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 20U;
-    Hacl_Impl_Ed25519_PointAdd_point_add(tmp1, q2, t2);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, q2, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(tmp1, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 20U, tmp1, 20U * sizeof (uint64_t)););
   uint64_t tmp10[20U] = { 0U };
   uint32_t i0 = 255U;
@@ -1486,25 +1565,39 @@ point_mul_g_double_vartime(uint64_t *out, uint8_t *scalar1, uint8_t *scalar2, ui
   uint32_t bits_l320 = (uint32_t)bits_c0;
   const uint64_t *a_bits_l0 = table2 + bits_l320 * 20U;
   memcpy(tmp10, (uint64_t *)a_bits_l0, 20U * sizeof (uint64_t));
-  Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp10);
+  uint64_t p_copy[20U] = { 0U };
+  memcpy(p_copy, out, 20U * sizeof (uint64_t));
+  Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy, tmp10);
   uint64_t tmp11[20U] = { 0U };
   for (uint32_t i = 0U; i < 51U; i++)
   {
-    KRML_MAYBE_FOR5(i2, 0U, 5U, 1U, Hacl_Impl_Ed25519_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR5(i2,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy0[20U] = { 0U };
+      memcpy(p_copy0, out, 20U * sizeof (uint64_t));
+      Hacl_Impl_Ed25519_PointDouble_point_double(out, p_copy0););
     uint32_t k = 255U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, bscalar2, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l321 = (uint32_t)bits_l;
     const uint64_t *a_bits_l1 = table2 + bits_l321 * 20U;
     memcpy(tmp11, (uint64_t *)a_bits_l1, 20U * sizeof (uint64_t));
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp11);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy0, tmp11);
     uint32_t k0 = 255U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, bscalar1, k0, 5U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l322 = (uint32_t)bits_l0;
     const
     uint64_t
     *a_bits_l2 = Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * 20U;
     memcpy(tmp11, (uint64_t *)a_bits_l2, 20U * sizeof (uint64_t));
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp11);
+    uint64_t p_copy1[20U] = { 0U };
+    memcpy(p_copy1, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy1, tmp11);
   }
 }
 
@@ -1624,10 +1717,10 @@ static inline void sha512_pre_msg(uint8_t *hash, uint8_t *prefix, uint32_t len,
 {
   uint8_t buf[128U] = { 0U };
   uint64_t block_state[8U] = { 0U };
+  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64 p = s;
-  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64 *st = &p;
   Hacl_Streaming_Types_error_code err0 = Hacl_Hash_SHA2_update_512(st, prefix, 32U);
   Hacl_Streaming_Types_error_code err1 = Hacl_Hash_SHA2_update_512(st, input, len);
@@ -1647,10 +1740,10 @@ sha512_pre_pre2_msg(
 {
   uint8_t buf[128U] = { 0U };
   uint64_t block_state[8U] = { 0U };
+  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64 p = s;
-  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64 *st = &p;
   Hacl_Streaming_Types_error_code err0 = Hacl_Hash_SHA2_update_512(st, prefix, 32U);
   Hacl_Streaming_Types_error_code err1 = Hacl_Hash_SHA2_update_512(st, prefix2, 32U);
@@ -1734,10 +1827,10 @@ Compute the expanded keys for an Ed25519 signature.
 */
 void Hacl_Ed25519_expand_keys(uint8_t *expanded_keys, uint8_t *private_key)
 {
-  uint8_t *public_key = expanded_keys;
   uint8_t *s_prefix = expanded_keys + 32U;
-  uint8_t *s = expanded_keys + 32U;
   secret_expand(s_prefix, private_key);
+  uint8_t *public_key = expanded_keys;
+  uint8_t *s = expanded_keys + 32U;
   point_mul_g_compress(public_key, s);
 }
 
@@ -1774,8 +1867,12 @@ Hacl_Ed25519_sign_expanded(
   sha512_modq_pre_pre2(hq, rs, public_key, msg_len, msg);
   uint64_t aq[5U] = { 0U };
   load_32_bytes(aq, s);
-  mul_modq(aq, hq, aq);
-  add_modq(aq, rq, aq);
+  uint64_t y_copy[5U] = { 0U };
+  memcpy(y_copy, aq, 5U * sizeof (uint64_t));
+  mul_modq(aq, hq, y_copy);
+  uint64_t y_copy0[5U] = { 0U };
+  memcpy(y_copy0, aq, 5U * sizeof (uint64_t));
+  add_modq(aq, rq, y_copy0);
   store_56(ss, aq);
 }
 
diff --git a/src/Hacl_FFDHE.c b/src/Hacl_FFDHE.c
index 098aa607..8e5c94f1 100644
--- a/src/Hacl_FFDHE.c
+++ b/src/Hacl_FFDHE.c
@@ -140,8 +140,8 @@ static inline void ffdhe_precomp_p(Spec_FFDHE_ffdhe_alg a, uint64_t *p_r2_n)
   uint32_t len = ffdhe_len(a);
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint8_t *os = p_s;
     uint8_t x = p[i];
+    uint8_t *os = p_s;
     os[i] = x;
   }
   Hacl_Bignum_Convert_bn_from_bytes_be_uint64(ffdhe_len(a), p_s, p_n);
@@ -279,8 +279,8 @@ Hacl_FFDHE_ffdhe_secret_to_public_precomp(
   memset(g_n, 0U, nLen * sizeof (uint64_t));
   uint8_t g = 0U;
   {
-    uint8_t *os = &g;
     uint8_t x = Hacl_Impl_FFDHE_Constants_ffdhe_g2[0U];
+    uint8_t *os = &g;
     os[0U] = x;
   }
   Hacl_Bignum_Convert_bn_from_bytes_be_uint64(1U, &g, g_n);
diff --git a/src/Hacl_Frodo1344.c b/src/Hacl_Frodo1344.c
index 9fe78471..33f87629 100644
--- a/src/Hacl_Frodo1344.c
+++ b/src/Hacl_Frodo1344.c
@@ -210,10 +210,10 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t kp_s[32U] = { 0U };
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;
   }
   uint32_t ss_init_len = 21664U;
diff --git a/src/Hacl_Frodo64.c b/src/Hacl_Frodo64.c
index 19f1562d..f88c5d63 100644
--- a/src/Hacl_Frodo64.c
+++ b/src/Hacl_Frodo64.c
@@ -214,10 +214,10 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
     0U,
     16U,
     1U,
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;);
   uint32_t ss_init_len = 1096U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
diff --git a/src/Hacl_Frodo640.c b/src/Hacl_Frodo640.c
index 8cf0253e..95feeb20 100644
--- a/src/Hacl_Frodo640.c
+++ b/src/Hacl_Frodo640.c
@@ -212,10 +212,10 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
     0U,
     16U,
     1U,
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;);
   uint32_t ss_init_len = 9736U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
diff --git a/src/Hacl_Frodo976.c b/src/Hacl_Frodo976.c
index 9360e3af..879fb5b2 100644
--- a/src/Hacl_Frodo976.c
+++ b/src/Hacl_Frodo976.c
@@ -210,10 +210,10 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t kp_s[24U] = { 0U };
   for (uint32_t i = 0U; i < 24U; i++)
   {
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;
   }
   uint32_t ss_init_len = 15768U;
diff --git a/src/Hacl_GenericField32.c b/src/Hacl_GenericField32.c
index f509e6d4..3e7597bd 100644
--- a/src/Hacl_GenericField32.c
+++ b/src/Hacl_GenericField32.c
@@ -102,9 +102,9 @@ Deallocate the memory previously allocated by Hacl_GenericField32_field_init.
 */
 void Hacl_GenericField32_field_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -117,8 +117,7 @@ Return the size of a modulus `n` in limbs.
 */
 uint32_t Hacl_GenericField32_field_get_len(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  return k1.len;
+  return (*k).len;
 }
 
 /**
@@ -137,8 +136,8 @@ Hacl_GenericField32_to_field(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_to_mont_u32(len1, k1.n, k1.mu, k1.r2, a, aM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_to_mont_u32(len1, uu____0.n, uu____0.mu, uu____0.r2, a, aM);
 }
 
 /**
@@ -158,8 +157,8 @@ Hacl_GenericField32_from_field(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, k1.n, k1.mu, aM, a);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, uu____0.n, uu____0.mu, aM, a);
 }
 
 /**
@@ -177,8 +176,16 @@ Hacl_GenericField32_add(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_bn_add_mod_n_u32(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint32_t), len1);
+  uint32_t a_copy[len1];
+  memset(a_copy, 0U, len1 * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), len1);
+  uint32_t b_copy[len1];
+  memset(b_copy, 0U, len1 * sizeof (uint32_t));
+  memcpy(a_copy, aM, len1 * sizeof (uint32_t));
+  memcpy(b_copy, bM, len1 * sizeof (uint32_t));
+  Hacl_Bignum_bn_add_mod_n_u32(len1, uu____0.n, a_copy, b_copy, cM);
 }
 
 /**
@@ -196,8 +203,7 @@ Hacl_GenericField32_sub(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_bn_sub_mod_n_u32(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_bn_sub_mod_n_u32(len1, (*k).n, aM, bM, cM);
 }
 
 /**
@@ -215,8 +221,8 @@ Hacl_GenericField32_mul(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, k1.n, k1.mu, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, uu____0.n, uu____0.mu, aM, bM, cM);
 }
 
 /**
@@ -233,8 +239,8 @@ Hacl_GenericField32_sqr(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, k1.n, k1.mu, aM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, uu____0.n, uu____0.mu, aM, cM);
 }
 
 /**
@@ -246,8 +252,8 @@ Convert a bignum `one` to its Montgomery representation.
 void Hacl_GenericField32_one(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k, uint32_t *oneM)
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, k1.n, k1.mu, k1.r2, oneM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, uu____0.n, uu____0.mu, uu____0.r2, oneM);
 }
 
 /**
@@ -278,22 +284,22 @@ Hacl_GenericField32_exp_consttime(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint32_t), k1.len);
-  uint32_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint32_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint32_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint32_t), uu____0.len);
+  uint32_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint32_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint32_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     uint32_t sw = 0U;
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -308,9 +314,9 @@ Hacl_GenericField32_exp_consttime(
         aMc[i] = aMc[i] ^ dummy;
       }
       uint32_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, k1.mu, aMc, resM, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, uu____0.mu, aMc, resM, aMc);
       uint32_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, k1.mu, resM, resM);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, uu____0.mu, resM, resM);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -335,8 +341,8 @@ Hacl_GenericField32_exp_consttime(
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     KRML_CHECK_SIZE(sizeof (uint32_t), 16U * len1);
     uint32_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint32_t));
@@ -347,19 +353,20 @@ Hacl_GenericField32_exp_consttime(
     uint32_t *t1 = table + len1;
     uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint32_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint32_t *t11 = table + (i + 1U) * len1;
       uint32_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint32_t));
       uint32_t *t2 = table + (2U * i + 2U) * len1;
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint32_t)););
     if (bBits % 4U != 0U)
     {
@@ -374,8 +381,8 @@ Hacl_GenericField32_exp_consttime(
         const uint32_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint32_t *os = resM;
           uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+          uint32_t *os = resM;
           os[i] = x;
         });
     }
@@ -383,7 +390,7 @@ Hacl_GenericField32_exp_consttime(
     {
       uint32_t *ctx_n = ctx;
       uint32_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint32_t), len1);
     uint32_t tmp0[len1];
@@ -395,9 +402,10 @@ Hacl_GenericField32_exp_consttime(
         4U,
         1U,
         uint32_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i0 - 4U;
       uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       memcpy(tmp0, (uint32_t *)(table + 0U * len1), len1 * sizeof (uint32_t));
       KRML_MAYBE_FOR15(i1,
         0U,
@@ -407,12 +415,12 @@ Hacl_GenericField32_exp_consttime(
         const uint32_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint32_t *os = tmp0;
           uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+          uint32_t *os = tmp0;
           os[i] = x;
         });
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -445,21 +453,21 @@ Hacl_GenericField32_exp_vartime(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint32_t), k1.len);
-  uint32_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint32_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint32_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint32_t), uu____0.len);
+  uint32_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint32_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint32_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -469,10 +477,10 @@ Hacl_GenericField32_exp_vartime(
       if (!(bit == 0U))
       {
         uint32_t *ctx_n0 = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, k1.mu, resM, aMc, resM);
+        Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, uu____0.mu, resM, aMc, resM);
       }
       uint32_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n0, k1.mu, aMc, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n0, uu____0.mu, aMc, aMc);
     }
   }
   else
@@ -489,8 +497,8 @@ Hacl_GenericField32_exp_vartime(
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     KRML_CHECK_SIZE(sizeof (uint32_t), 16U * len1);
     uint32_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint32_t));
@@ -501,19 +509,20 @@ Hacl_GenericField32_exp_vartime(
     uint32_t *t1 = table + len1;
     uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint32_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint32_t *t11 = table + (i + 1U) * len1;
       uint32_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint32_t));
       uint32_t *t2 = table + (2U * i + 2U) * len1;
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint32_t)););
     if (bBits % 4U != 0U)
     {
@@ -527,7 +536,7 @@ Hacl_GenericField32_exp_vartime(
     {
       uint32_t *ctx_n = ctx;
       uint32_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint32_t), len1);
     uint32_t tmp0[len1];
@@ -539,14 +548,15 @@ Hacl_GenericField32_exp_vartime(
         4U,
         1U,
         uint32_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i - 4U;
       uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       uint32_t bits_l32 = bits_l;
       const uint32_t *a_bits_l = table + bits_l32 * len1;
       memcpy(tmp0, (uint32_t *)a_bits_l, len1 * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -569,16 +579,16 @@ Hacl_GenericField32_inverse(
   uint32_t *aInvM
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t len1 = k1.len;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t len1 = uu____0.len;
   KRML_CHECK_SIZE(sizeof (uint32_t), len1);
   uint32_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint32_t));
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, uu____0.n[0U], 2U, n2);
   uint32_t c1;
   if (1U < len1)
   {
-    uint32_t *a1 = k1.n + 1U;
+    uint32_t *a1 = uu____0.n + 1U;
     uint32_t *res1 = n2 + 1U;
     uint32_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -610,6 +620,6 @@ Hacl_GenericField32_inverse(
     c1 = c0;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
-  Hacl_GenericField32_exp_vartime(k, aM, k1.len * 32U, n2, aInvM);
+  Hacl_GenericField32_exp_vartime(k, aM, uu____0.len * 32U, n2, aInvM);
 }
 
diff --git a/src/Hacl_GenericField64.c b/src/Hacl_GenericField64.c
index 3f291d36..3092ac02 100644
--- a/src/Hacl_GenericField64.c
+++ b/src/Hacl_GenericField64.c
@@ -101,9 +101,9 @@ Deallocate the memory previously allocated by Hacl_GenericField64_field_init.
 */
 void Hacl_GenericField64_field_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -116,8 +116,7 @@ Return the size of a modulus `n` in limbs.
 */
 uint32_t Hacl_GenericField64_field_get_len(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  return k1.len;
+  return (*k).len;
 }
 
 /**
@@ -136,8 +135,8 @@ Hacl_GenericField64_to_field(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_to_mont_u64(len1, k1.n, k1.mu, k1.r2, a, aM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_to_mont_u64(len1, uu____0.n, uu____0.mu, uu____0.r2, a, aM);
 }
 
 /**
@@ -157,8 +156,8 @@ Hacl_GenericField64_from_field(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, k1.n, k1.mu, aM, a);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, uu____0.n, uu____0.mu, aM, a);
 }
 
 /**
@@ -176,8 +175,16 @@ Hacl_GenericField64_add(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_bn_add_mod_n_u64(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint64_t), len1);
+  uint64_t a_copy[len1];
+  memset(a_copy, 0U, len1 * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), len1);
+  uint64_t b_copy[len1];
+  memset(b_copy, 0U, len1 * sizeof (uint64_t));
+  memcpy(a_copy, aM, len1 * sizeof (uint64_t));
+  memcpy(b_copy, bM, len1 * sizeof (uint64_t));
+  Hacl_Bignum_bn_add_mod_n_u64(len1, uu____0.n, a_copy, b_copy, cM);
 }
 
 /**
@@ -195,8 +202,7 @@ Hacl_GenericField64_sub(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_bn_sub_mod_n_u64(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_bn_sub_mod_n_u64(len1, (*k).n, aM, bM, cM);
 }
 
 /**
@@ -214,8 +220,8 @@ Hacl_GenericField64_mul(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, k1.n, k1.mu, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, uu____0.n, uu____0.mu, aM, bM, cM);
 }
 
 /**
@@ -232,8 +238,8 @@ Hacl_GenericField64_sqr(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, k1.n, k1.mu, aM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, uu____0.n, uu____0.mu, aM, cM);
 }
 
 /**
@@ -245,8 +251,8 @@ Convert a bignum `one` to its Montgomery representation.
 void Hacl_GenericField64_one(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, uint64_t *oneM)
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, k1.n, k1.mu, k1.r2, oneM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, uu____0.n, uu____0.mu, uu____0.r2, oneM);
 }
 
 /**
@@ -277,22 +283,22 @@ Hacl_GenericField64_exp_consttime(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint64_t), k1.len);
-  uint64_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint64_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint64_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint64_t), uu____0.len);
+  uint64_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint64_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint64_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     uint64_t sw = 0ULL;
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -307,9 +313,9 @@ Hacl_GenericField64_exp_consttime(
         aMc[i] = aMc[i] ^ dummy;
       }
       uint64_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, k1.mu, aMc, resM, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, uu____0.mu, aMc, resM, aMc);
       uint64_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, k1.mu, resM, resM);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, uu____0.mu, resM, resM);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -334,8 +340,8 @@ Hacl_GenericField64_exp_consttime(
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     KRML_CHECK_SIZE(sizeof (uint64_t), 16U * len1);
     uint64_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint64_t));
@@ -346,19 +352,20 @@ Hacl_GenericField64_exp_consttime(
     uint64_t *t1 = table + len1;
     uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint64_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint64_t *t11 = table + (i + 1U) * len1;
       uint64_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint64_t));
       uint64_t *t2 = table + (2U * i + 2U) * len1;
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint64_t)););
     if (bBits % 4U != 0U)
     {
@@ -373,8 +380,8 @@ Hacl_GenericField64_exp_consttime(
         const uint64_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint64_t *os = resM;
           uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+          uint64_t *os = resM;
           os[i] = x;
         });
     }
@@ -382,7 +389,7 @@ Hacl_GenericField64_exp_consttime(
     {
       uint64_t *ctx_n = ctx;
       uint64_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint64_t), len1);
     uint64_t tmp0[len1];
@@ -394,9 +401,10 @@ Hacl_GenericField64_exp_consttime(
         4U,
         1U,
         uint64_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i0 - 4U;
       uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       memcpy(tmp0, (uint64_t *)(table + 0U * len1), len1 * sizeof (uint64_t));
       KRML_MAYBE_FOR15(i1,
         0U,
@@ -406,12 +414,12 @@ Hacl_GenericField64_exp_consttime(
         const uint64_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint64_t *os = tmp0;
           uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+          uint64_t *os = tmp0;
           os[i] = x;
         });
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -444,21 +452,21 @@ Hacl_GenericField64_exp_vartime(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint64_t), k1.len);
-  uint64_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint64_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint64_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint64_t), uu____0.len);
+  uint64_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint64_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint64_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -468,10 +476,10 @@ Hacl_GenericField64_exp_vartime(
       if (!(bit == 0ULL))
       {
         uint64_t *ctx_n0 = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, k1.mu, resM, aMc, resM);
+        Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, uu____0.mu, resM, aMc, resM);
       }
       uint64_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n0, k1.mu, aMc, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n0, uu____0.mu, aMc, aMc);
     }
   }
   else
@@ -488,8 +496,8 @@ Hacl_GenericField64_exp_vartime(
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     KRML_CHECK_SIZE(sizeof (uint64_t), 16U * len1);
     uint64_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint64_t));
@@ -500,19 +508,20 @@ Hacl_GenericField64_exp_vartime(
     uint64_t *t1 = table + len1;
     uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint64_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint64_t *t11 = table + (i + 1U) * len1;
       uint64_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint64_t));
       uint64_t *t2 = table + (2U * i + 2U) * len1;
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint64_t)););
     if (bBits % 4U != 0U)
     {
@@ -526,7 +535,7 @@ Hacl_GenericField64_exp_vartime(
     {
       uint64_t *ctx_n = ctx;
       uint64_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint64_t), len1);
     uint64_t tmp0[len1];
@@ -538,14 +547,15 @@ Hacl_GenericField64_exp_vartime(
         4U,
         1U,
         uint64_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i - 4U;
       uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       uint32_t bits_l32 = (uint32_t)bits_l;
       const uint64_t *a_bits_l = table + bits_l32 * len1;
       memcpy(tmp0, (uint64_t *)a_bits_l, len1 * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -568,16 +578,16 @@ Hacl_GenericField64_inverse(
   uint64_t *aInvM
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint32_t len1 = k1.len;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint32_t len1 = uu____0.len;
   KRML_CHECK_SIZE(sizeof (uint64_t), len1);
   uint64_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint64_t));
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, uu____0.n[0U], 2ULL, n2);
   uint64_t c1;
   if (1U < len1)
   {
-    uint64_t *a1 = k1.n + 1U;
+    uint64_t *a1 = uu____0.n + 1U;
     uint64_t *res1 = n2 + 1U;
     uint64_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -609,6 +619,6 @@ Hacl_GenericField64_inverse(
     c1 = c0;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
-  Hacl_GenericField64_exp_vartime(k, aM, k1.len * 64U, n2, aInvM);
+  Hacl_GenericField64_exp_vartime(k, aM, uu____0.len * 64U, n2, aInvM);
 }
 
diff --git a/src/Hacl_HMAC.c b/src/Hacl_HMAC.c
index b03bc7ac..79d584bd 100644
--- a/src/Hacl_HMAC.c
+++ b/src/Hacl_HMAC.c
@@ -89,7 +89,6 @@ Hacl_HMAC_compute_sha1(
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
   uint32_t s[5U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U, 0xc3d2e1f0U };
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA1_update_last(s, 0ULL, ipad, 64U);
@@ -118,6 +117,7 @@ Hacl_HMAC_compute_sha1(
     Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
     Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA1_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA1_init(s);
@@ -207,11 +207,10 @@ Hacl_HMAC_compute_sha2_256(
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha256_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
@@ -243,6 +242,7 @@ Hacl_HMAC_compute_sha2_256(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha256_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha256_init(s);
@@ -335,11 +335,10 @@ Hacl_HMAC_compute_sha2_384(
     0U,
     8U,
     1U,
-    uint64_t *os = st;
     uint64_t x = Hacl_Hash_SHA2_h384[i];
+    uint64_t *os = st;
     os[i] = x;);
   uint64_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
@@ -377,6 +376,7 @@ Hacl_HMAC_compute_sha2_384(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha384_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha384_init(s);
@@ -471,11 +471,10 @@ Hacl_HMAC_compute_sha2_512(
     0U,
     8U,
     1U,
-    uint64_t *os = st;
     uint64_t x = Hacl_Hash_SHA2_h512[i];
+    uint64_t *os = st;
     os[i] = x;);
   uint64_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
@@ -513,6 +512,7 @@ Hacl_HMAC_compute_sha2_512(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha512_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha512_init(s);
@@ -605,11 +605,10 @@ Hacl_HMAC_compute_blake2s_32(
   uint32_t s[16U] = { 0U };
   Hacl_Hash_Blake2s_init(s, 0U, 32U);
   uint32_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -644,10 +643,12 @@ Hacl_HMAC_compute_blake2s_32(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2s_finish(32U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2s_init(s0, 0U, 32U);
@@ -682,6 +683,7 @@ Hacl_HMAC_compute_blake2s_32(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -748,11 +750,16 @@ Hacl_HMAC_compute_blake2b_32(
   uint64_t s[16U] = { 0U };
   Hacl_Hash_Blake2b_init(s, 0U, 64U);
   uint64_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -787,11 +794,13 @@ Hacl_HMAC_compute_blake2b_32(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2b_finish(64U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2b_init(s0, 0U, 64U);
@@ -826,6 +835,7 @@ Hacl_HMAC_compute_blake2b_32(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/Hacl_HMAC_Blake2b_256.c b/src/Hacl_HMAC_Blake2b_256.c
index 6197490a..585313af 100644
--- a/src/Hacl_HMAC_Blake2b_256.c
+++ b/src/Hacl_HMAC_Blake2b_256.c
@@ -89,13 +89,13 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[4U] KRML_POST_ALIGN(32) = { 0U };
   Hacl_Hash_Blake2b_Simd256_init(s, 0U, 64U);
   Lib_IntVector_Intrinsics_vec256 *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 wv[4U] KRML_POST_ALIGN(32) = { 0U };
     Hacl_Hash_Blake2b_Simd256_update_last(128U,
       wv,
       s0,
+      false,
       FStar_UInt128_uint64_to_uint128(0ULL),
       128U,
       ipad);
@@ -138,11 +138,13 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
     Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2b_Simd256_finish(64U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2b_Simd256_init(s0, 0U, 64U);
@@ -182,6 +184,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/Hacl_HMAC_Blake2s_128.c b/src/Hacl_HMAC_Blake2s_128.c
index 0741bffb..0a40e243 100644
--- a/src/Hacl_HMAC_Blake2s_128.c
+++ b/src/Hacl_HMAC_Blake2s_128.c
@@ -88,11 +88,10 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 s[4U] KRML_POST_ALIGN(16) = { 0U };
   Hacl_Hash_Blake2s_Simd128_init(s, 0U, 32U);
   Lib_IntVector_Intrinsics_vec128 *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -127,10 +126,12 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
     Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2s_Simd128_finish(32U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2s_Simd128_init(s0, 0U, 32U);
@@ -165,6 +166,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
diff --git a/src/Hacl_Hash_Blake2b.c b/src/Hacl_Hash_Blake2b.c
index d490a1a5..e13f16fd 100644
--- a/src/Hacl_Hash_Blake2b.c
+++ b/src/Hacl_Hash_Blake2b.c
@@ -29,7 +29,14 @@
 #include "lib_memzero0.h"
 
 static void
-update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totlen, uint8_t *d)
+update_block(
+  uint64_t *wv,
+  uint64_t *hash,
+  bool flag,
+  bool last_node,
+  FStar_UInt128_uint128 totlen,
+  uint8_t *d
+)
 {
   uint64_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
@@ -52,7 +59,15 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask[0U] = FStar_UInt128_uint128_to_uint64(totlen);
   mask[1U] = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U));
   mask[2U] = wv_14;
@@ -647,11 +662,11 @@ static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -674,7 +689,7 @@ Hacl_Hash_Blake2b_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -683,6 +698,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -693,7 +709,7 @@ Hacl_Hash_Blake2b_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -727,7 +743,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -762,16 +778,19 @@ void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash)
 }
 
 static Hacl_Hash_Blake2b_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -790,7 +809,8 @@ static Hacl_Hash_Blake2b_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -800,7 +820,7 @@ static Hacl_Hash_Blake2b_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
@@ -820,14 +840,16 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -844,7 +866,7 @@ The caller must satisfy the following requirements.
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
   uint8_t salt[16U] = { 0U };
   uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
@@ -855,7 +877,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, k);
+  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
@@ -872,28 +894,30 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s)
 {
   Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2b_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -903,7 +927,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -921,7 +945,7 @@ reset_raw(
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -934,7 +958,7 @@ Hacl_Hash_Blake2b_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -957,7 +981,7 @@ void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
@@ -1040,7 +1064,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      K____uint64_t___uint64_t_ acc = block_state1.f3;
       uint64_t *wv = acc.fst;
       uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1065,7 +1089,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    K____uint64_t___uint64_t_ acc = block_state1.f3;
     uint64_t *wv = acc.fst;
     uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1133,7 +1157,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      K____uint64_t___uint64_t_ acc = block_state1.f3;
       uint64_t *wv = acc.fst;
       uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1159,7 +1183,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    K____uint64_t___uint64_t_ acc = block_state1.f3;
     uint64_t *wv = acc.fst;
     uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1190,16 +1214,20 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_state_t scrut = *state;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_state_t scrut = *s;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1217,9 +1245,14 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   uint64_t b[16U] = { 0U };
   Hacl_Hash_Blake2b_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint64_t *src_b = block_state.thd.snd;
-  uint64_t *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  uint64_t *src_b = block_state.f3.snd;
+  uint64_t *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1233,7 +1266,7 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  K____uint64_t___uint64_t_ acc0 = tmp_block_state.thd;
+  K____uint64_t___uint64_t_ acc0 = tmp_block_state.f3;
   uint64_t *wv1 = acc0.fst;
   uint64_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
@@ -1244,17 +1277,35 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint64_t___uint64_t_ acc = tmp_block_state.thd;
+  K____uint64_t___uint64_t_ acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   uint64_t *wv = acc.fst;
   uint64_t *hash = acc.snd;
   Hacl_Hash_Blake2b_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2b_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1265,8 +1316,8 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint64_t *b = block_state.thd.snd;
-  uint64_t *wv = block_state.thd.fst;
+  uint64_t *b = block_state.f3.snd;
+  uint64_t *wv = block_state.f3.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1282,17 +1333,24 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *sta
   Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint64_t *src_b = block_state0.thd.snd;
-  uint64_t *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  uint64_t *src_b = block_state0.f3.snd;
+  uint64_t *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   Hacl_Hash_Blake2b_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1335,10 +1393,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/Hacl_Hash_Blake2b_Simd256.c b/src/Hacl_Hash_Blake2b_Simd256.c
index 0afd93bc..35608aea 100644
--- a/src/Hacl_Hash_Blake2b_Simd256.c
+++ b/src/Hacl_Hash_Blake2b_Simd256.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
   bool flag,
+  bool last_node,
   FStar_UInt128_uint128 totlen,
   uint8_t *d
 )
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec256_load64s(FStar_UInt128_uint128_to_uint64(totlen),
       FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U)),
@@ -372,11 +381,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -399,7 +408,7 @@ Hacl_Hash_Blake2b_Simd256_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -408,6 +417,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -418,7 +428,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -452,7 +462,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_Simd256_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -593,10 +603,7 @@ Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2b_Simd256_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -610,7 +617,13 @@ static Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -632,7 +645,8 @@ static Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -642,42 +656,56 @@ static Hacl_Hash_Blake2b_Simd256_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -685,21 +713,16 @@ Hacl_Hash_Blake2b_Simd256_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2b_Simd256_state_t
-  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 {
@@ -709,28 +732,31 @@ Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2b_Simd256_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -740,7 +766,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -757,9 +783,11 @@ reset_raw(
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -769,14 +797,15 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k)
 {
@@ -791,11 +820,16 @@ void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
@@ -803,7 +837,7 @@ void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -873,8 +907,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -899,7 +932,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -967,8 +1000,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -994,7 +1026,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1020,16 +1052,25 @@ Hacl_Hash_Blake2b_Simd256_update(
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_Simd256_state_t scrut = *s;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1047,9 +1088,14 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
   Hacl_Hash_Blake2b_Simd256_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1064,7 +1110,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
   K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc0 = tmp_block_state.thd;
+  acc0 = tmp_block_state.f3;
   Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst;
   Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd;
   uint32_t nb = 0U;
@@ -1076,17 +1122,35 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc = tmp_block_state.thd;
+  acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
   Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
   Hacl_Hash_Blake2b_Simd256_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1097,8 +1161,8 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec256 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec256 *b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1106,7 +1170,7 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state)
@@ -1115,9 +1179,10 @@ Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -1131,9 +1196,15 @@ Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1175,8 +1246,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
 }
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/Hacl_Hash_Blake2s.c b/src/Hacl_Hash_Blake2s.c
index 6e19d83d..167f38fb 100644
--- a/src/Hacl_Hash_Blake2s.c
+++ b/src/Hacl_Hash_Blake2s.c
@@ -30,7 +30,14 @@
 #include "lib_memzero0.h"
 
 static inline void
-update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *d)
+update_block(
+  uint32_t *wv,
+  uint32_t *hash,
+  bool flag,
+  bool last_node,
+  uint64_t totlen,
+  uint8_t *d
+)
 {
   uint32_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
@@ -53,7 +60,15 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask[0U] = (uint32_t)totlen;
   mask[1U] = (uint32_t)(totlen >> 32U);
   mask[2U] = wv_14;
@@ -642,11 +657,11 @@ static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -666,7 +681,7 @@ Hacl_Hash_Blake2s_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -675,6 +690,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -684,7 +700,7 @@ Hacl_Hash_Blake2s_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -712,7 +728,7 @@ update_blocks(uint32_t len, uint32_t *wv, uint32_t *hash, uint64_t prev, uint8_t
     rem = rem0;
   }
   Hacl_Hash_Blake2s_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -747,16 +763,19 @@ void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash)
 }
 
 static Hacl_Hash_Blake2s_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -775,7 +794,8 @@ static Hacl_Hash_Blake2s_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -785,38 +805,55 @@ static Hacl_Hash_Blake2s_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -824,20 +861,15 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
-  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 {
@@ -847,28 +879,30 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s)
 {
   Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2s_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -878,7 +912,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -895,9 +929,11 @@ reset_raw(
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -907,14 +943,15 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
 {
@@ -929,11 +966,16 @@ void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 {
@@ -941,7 +983,7 @@ void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len)
@@ -1007,7 +1049,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      K____uint32_t___uint32_t_ acc = block_state1.f3;
       uint32_t *wv = acc.fst;
       uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1027,7 +1069,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    K____uint32_t___uint32_t_ acc = block_state1.f3;
     uint32_t *wv = acc.fst;
     uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -1090,7 +1132,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      K____uint32_t___uint32_t_ acc = block_state1.f3;
       uint32_t *wv = acc.fst;
       uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1111,7 +1153,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    K____uint32_t___uint32_t_ acc = block_state1.f3;
     uint32_t *wv = acc.fst;
     uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -1132,15 +1174,25 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_state_t scrut = *state;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_state_t scrut = *s;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1158,9 +1210,14 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   uint32_t b[16U] = { 0U };
   Hacl_Hash_Blake2s_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint32_t *src_b = block_state.thd.snd;
-  uint32_t *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  uint32_t *src_b = block_state.f3.snd;
+  uint32_t *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1174,18 +1231,35 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  K____uint32_t___uint32_t_ acc0 = tmp_block_state.thd;
+  K____uint32_t___uint32_t_ acc0 = tmp_block_state.f3;
   uint32_t *wv1 = acc0.fst;
   uint32_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint32_t___uint32_t_ acc = tmp_block_state.thd;
+  K____uint32_t___uint32_t_ acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   uint32_t *wv = acc.fst;
   uint32_t *hash = acc.snd;
-  Hacl_Hash_Blake2s_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Hacl_Hash_Blake2s_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2s_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1196,8 +1270,8 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint32_t *b = block_state.thd.snd;
-  uint32_t *wv = block_state.thd.fst;
+  uint32_t *b = block_state.f3.snd;
+  uint32_t *wv = block_state.f3.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1205,7 +1279,7 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state)
 {
@@ -1213,17 +1287,24 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *sta
   Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint32_t *src_b = block_state0.thd.snd;
-  uint32_t *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  uint32_t *src_b = block_state0.f3.snd;
+  uint32_t *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   Hacl_Hash_Blake2s_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1262,8 +1343,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/Hacl_Hash_Blake2s_Simd128.c b/src/Hacl_Hash_Blake2s_Simd128.c
index c02da8fa..a85b18a4 100644
--- a/src/Hacl_Hash_Blake2s_Simd128.c
+++ b/src/Hacl_Hash_Blake2s_Simd128.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
   bool flag,
+  bool last_node,
   uint64_t totlen,
   uint8_t *d
 )
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)totlen,
       (uint32_t)(totlen >> 32U),
@@ -366,11 +375,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -390,7 +399,7 @@ Hacl_Hash_Blake2s_Simd128_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -399,6 +408,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -408,7 +418,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -442,7 +452,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2s_Simd128_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -583,10 +593,7 @@ Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2s_Simd128_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -600,7 +607,13 @@ static Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -622,7 +635,8 @@ static Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -632,42 +646,56 @@ static Hacl_Hash_Blake2s_Simd128_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -675,21 +703,16 @@ Hacl_Hash_Blake2s_Simd128_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2s_Simd128_state_t
-  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 {
@@ -699,28 +722,31 @@ Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2s_Simd128_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -730,7 +756,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -747,9 +773,11 @@ reset_raw(
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -759,14 +787,15 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k)
 {
@@ -781,11 +810,16 @@ void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
@@ -793,7 +827,7 @@ void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -863,8 +897,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -884,7 +917,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -947,8 +980,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -969,7 +1001,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -990,16 +1022,25 @@ Hacl_Hash_Blake2s_Simd128_update(
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_Simd128_state_t scrut = *s;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1017,9 +1058,14 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
   Hacl_Hash_Blake2s_Simd128_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1034,19 +1080,36 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
   K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc0 = tmp_block_state.thd;
+  acc0 = tmp_block_state.f3;
   Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst;
   Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc = tmp_block_state.thd;
+  acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
   Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
-  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1057,8 +1120,8 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec128 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec128 *b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1066,7 +1129,7 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state)
@@ -1075,9 +1138,10 @@ Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -1091,9 +1155,15 @@ Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1135,8 +1205,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/Hacl_Hash_MD5.c b/src/Hacl_Hash_MD5.c
index ed294839..f2d874eb 100644
--- a/src/Hacl_Hash_MD5.c
+++ b/src/Hacl_Hash_MD5.c
@@ -1167,24 +1167,20 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_MD5_malloc(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(4U, sizeof (uint32_t));
+  Hacl_Hash_MD5_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_MD5_init(block_state);
   return p;
 }
 
 void Hacl_Hash_MD5_reset(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_MD5_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 /**
@@ -1193,8 +1189,8 @@ void Hacl_Hash_MD5_reset(Hacl_Streaming_MD_state_32 *state)
 Hacl_Streaming_Types_error_code
 Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_32 s = *state;
-  uint64_t total_len = s.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 2305843009213693951ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -1210,10 +1206,8 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1226,22 +1220,12 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1253,7 +1237,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_MD5_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_MD5_update_multi(block_state, buf, 1U);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1269,28 +1253,18 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U);
+    Hacl_Hash_MD5_update_multi(block_state, data1, data1_len / 64U);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -1300,22 +1274,12 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_32 s10 = *state;
-    uint32_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1327,7 +1291,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_MD5_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_MD5_update_multi(block_state, buf0, 1U);
     }
     uint32_t ite;
     if
@@ -1344,28 +1308,19 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U);
-    uint8_t *dst = buf;
+    Hacl_Hash_MD5_update_multi(block_state, data1, data1_len / 64U);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 void Hacl_Hash_MD5_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -1378,6 +1333,7 @@ void Hacl_Hash_MD5_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[4U] = { 0U };
   memcpy(tmp_block_state, block_state, 4U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -1388,7 +1344,6 @@ void Hacl_Hash_MD5_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_MD5_update_multi(tmp_block_state, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_MD5_update_last(tmp_block_state, prev_len_last, buf_last, r);
@@ -1407,10 +1362,9 @@ void Hacl_Hash_MD5_free(Hacl_Streaming_MD_state_32 *state)
 
 Hacl_Streaming_MD_state_32 *Hacl_Hash_MD5_copy(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint32_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(4U, sizeof (uint32_t));
diff --git a/src/Hacl_Hash_SHA1.c b/src/Hacl_Hash_SHA1.c
index 1a8b09b1..44f6bfe5 100644
--- a/src/Hacl_Hash_SHA1.c
+++ b/src/Hacl_Hash_SHA1.c
@@ -200,24 +200,20 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA1_malloc(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(5U, sizeof (uint32_t));
+  Hacl_Hash_SHA1_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_SHA1_init(block_state);
   return p;
 }
 
 void Hacl_Hash_SHA1_reset(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_SHA1_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 /**
@@ -226,8 +222,8 @@ void Hacl_Hash_SHA1_reset(Hacl_Streaming_MD_state_32 *state)
 Hacl_Streaming_Types_error_code
 Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_32 s = *state;
-  uint64_t total_len = s.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 2305843009213693951ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -243,10 +239,8 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -259,22 +253,12 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -286,7 +270,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA1_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_SHA1_update_multi(block_state, buf, 1U);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -302,28 +286,18 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U);
+    Hacl_Hash_SHA1_update_multi(block_state, data1, data1_len / 64U);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -333,22 +307,12 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_32 s10 = *state;
-    uint32_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -360,7 +324,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA1_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_SHA1_update_multi(block_state, buf0, 1U);
     }
     uint32_t ite;
     if
@@ -377,28 +341,19 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U);
-    uint8_t *dst = buf;
+    Hacl_Hash_SHA1_update_multi(block_state, data1, data1_len / 64U);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 void Hacl_Hash_SHA1_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -411,6 +366,7 @@ void Hacl_Hash_SHA1_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[5U] = { 0U };
   memcpy(tmp_block_state, block_state, 5U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -421,7 +377,6 @@ void Hacl_Hash_SHA1_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA1_update_multi(tmp_block_state, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA1_update_last(tmp_block_state, prev_len_last, buf_last, r);
@@ -440,10 +395,9 @@ void Hacl_Hash_SHA1_free(Hacl_Streaming_MD_state_32 *state)
 
 Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA1_copy(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint32_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(5U, sizeof (uint32_t));
diff --git a/src/Hacl_Hash_SHA2.c b/src/Hacl_Hash_SHA2.c
index 995fe707..1d4fbbfd 100644
--- a/src/Hacl_Hash_SHA2.c
+++ b/src/Hacl_Hash_SHA2.c
@@ -33,8 +33,8 @@ void Hacl_Hash_SHA2_sha256_init(uint32_t *hash)
     0U,
     8U,
     1U,
-    uint32_t *os = hash;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = hash;
     os[i] = x;);
 }
 
@@ -140,8 +140,8 @@ static inline void sha256_update(uint8_t *b, uint32_t *hash)
     0U,
     8U,
     1U,
-    uint32_t *os = hash;
     uint32_t x = hash[i] + hash_old[i];
+    uint32_t *os = hash;
     os[i] = x;);
 }
 
@@ -206,8 +206,8 @@ void Hacl_Hash_SHA2_sha224_init(uint32_t *hash)
     0U,
     8U,
     1U,
-    uint32_t *os = hash;
     uint32_t x = Hacl_Hash_SHA2_h224[i];
+    uint32_t *os = hash;
     os[i] = x;);
 }
 
@@ -234,8 +234,8 @@ void Hacl_Hash_SHA2_sha512_init(uint64_t *hash)
     0U,
     8U,
     1U,
-    uint64_t *os = hash;
     uint64_t x = Hacl_Hash_SHA2_h512[i];
+    uint64_t *os = hash;
     os[i] = x;);
 }
 
@@ -341,8 +341,8 @@ static inline void sha512_update(uint8_t *b, uint64_t *hash)
     0U,
     8U,
     1U,
-    uint64_t *os = hash;
     uint64_t x = hash[i] + hash_old[i];
+    uint64_t *os = hash;
     os[i] = x;);
 }
 
@@ -412,8 +412,8 @@ void Hacl_Hash_SHA2_sha384_init(uint64_t *hash)
     0U,
     8U,
     1U,
-    uint64_t *os = hash;
     uint64_t x = Hacl_Hash_SHA2_h384[i];
+    uint64_t *os = hash;
     os[i] = x;);
 }
 
@@ -448,12 +448,12 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA2_malloc_256(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(8U, sizeof (uint32_t));
+  Hacl_Hash_SHA2_sha256_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha256_init(block_state);
   return p;
 }
 
@@ -465,10 +465,9 @@ more (different) data into the hash in each branch.
 */
 Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA2_copy_256(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint32_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(8U, sizeof (uint32_t));
@@ -486,20 +485,16 @@ Reset an existing state to the initial hash state with empty data.
 */
 void Hacl_Hash_SHA2_reset_256(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha256_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 static inline Hacl_Streaming_Types_error_code
 update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_32 s = *state;
-  uint64_t total_len = s.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 2305843009213693951ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -515,10 +510,8 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -531,22 +524,12 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -558,7 +541,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf, block_state1);
+      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf, block_state);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -574,28 +557,18 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1);
+    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -605,22 +578,12 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_32 s10 = *state;
-    uint32_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -632,7 +595,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf, block_state1);
+      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf0, block_state);
     }
     uint32_t ite;
     if
@@ -649,18 +612,10 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1);
-    uint8_t *dst = buf;
+    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -690,10 +645,9 @@ the state and therefore does not invalidate the client-held state `p`.)
 */
 void Hacl_Hash_SHA2_digest_256(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -706,6 +660,7 @@ void Hacl_Hash_SHA2_digest_256(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -716,7 +671,6 @@ void Hacl_Hash_SHA2_digest_256(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA2_sha256_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha256_update_last(prev_len_last + (uint64_t)r, r, buf_last, tmp_block_state);
@@ -761,24 +715,20 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA2_malloc_224(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(8U, sizeof (uint32_t));
+  Hacl_Hash_SHA2_sha224_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha224_init(block_state);
   return p;
 }
 
 void Hacl_Hash_SHA2_reset_224(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha224_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 Hacl_Streaming_Types_error_code
@@ -798,10 +748,9 @@ the hash via `update_224`.
 */
 void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -814,6 +763,7 @@ void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -824,7 +774,6 @@ void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   sha224_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha224_update_last(prev_len_last + (uint64_t)r, r, buf_last, tmp_block_state);
@@ -859,12 +808,12 @@ Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_malloc_512(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *block_state = (uint64_t *)KRML_HOST_CALLOC(8U, sizeof (uint64_t));
+  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64
   *p = (Hacl_Streaming_MD_state_64 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_64));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha512_init(block_state);
   return p;
 }
 
@@ -876,10 +825,9 @@ more (different) data into the hash in each branch.
 */
 Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_copy_512(Hacl_Streaming_MD_state_64 *state)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint64_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint64_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   uint64_t *block_state = (uint64_t *)KRML_HOST_CALLOC(8U, sizeof (uint64_t));
@@ -894,20 +842,16 @@ Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_copy_512(Hacl_Streaming_MD_state_64 *
 
 void Hacl_Hash_SHA2_reset_512(Hacl_Streaming_MD_state_64 *state)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint64_t *block_state = scrut.block_state;
+  uint64_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha512_init(block_state);
-  Hacl_Streaming_MD_state_64
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 static inline Hacl_Streaming_Types_error_code
 update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_64 s = *state;
-  uint64_t total_len = s.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 18446744073709551615ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -923,10 +867,8 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
   }
   if (chunk_len <= 128U - sz)
   {
-    Hacl_Streaming_MD_state_64 s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -939,22 +881,12 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_64 s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -966,7 +898,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf, block_state1);
+      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf, block_state);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)128U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -982,28 +914,18 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1);
+    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 128U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_64 s1 = *state;
-    uint64_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)128U == 0ULL && total_len10 > 0ULL)
     {
@@ -1013,22 +935,12 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)128U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_64 s10 = *state;
-    uint64_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -1040,7 +952,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf, block_state1);
+      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf0, block_state);
     }
     uint32_t ite;
     if
@@ -1057,18 +969,10 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1);
-    uint8_t *dst = buf;
+    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1098,10 +1002,9 @@ the state and therefore does not invalidate the client-held state `p`.)
 */
 void Hacl_Hash_SHA2_digest_512(Hacl_Streaming_MD_state_64 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint64_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL)
   {
@@ -1114,6 +1017,7 @@ void Hacl_Hash_SHA2_digest_512(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint64_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 128U == 0U && r > 0U)
   {
@@ -1124,7 +1028,6 @@ void Hacl_Hash_SHA2_digest_512(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
     ite = r % 128U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA2_sha512_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(prev_len_last),
@@ -1173,24 +1076,20 @@ Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_malloc_384(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *block_state = (uint64_t *)KRML_HOST_CALLOC(8U, sizeof (uint64_t));
+  Hacl_Hash_SHA2_sha384_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64
   *p = (Hacl_Streaming_MD_state_64 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_64));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha384_init(block_state);
   return p;
 }
 
 void Hacl_Hash_SHA2_reset_384(Hacl_Streaming_MD_state_64 *state)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint64_t *block_state = scrut.block_state;
+  uint64_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha384_init(block_state);
-  Hacl_Streaming_MD_state_64
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
 }
 
 Hacl_Streaming_Types_error_code
@@ -1210,10 +1109,9 @@ the hash via `update_384`.
 */
 void Hacl_Hash_SHA2_digest_384(Hacl_Streaming_MD_state_64 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint64_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL)
   {
@@ -1226,6 +1124,7 @@ void Hacl_Hash_SHA2_digest_384(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint64_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 128U == 0U && r > 0U)
   {
@@ -1236,7 +1135,6 @@ void Hacl_Hash_SHA2_digest_384(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
     ite = r % 128U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA2_sha384_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(prev_len_last),
diff --git a/src/Hacl_Hash_SHA3.c b/src/Hacl_Hash_SHA3.c
index 89bb0491..9cf5abb3 100644
--- a/src/Hacl_Hash_SHA3.c
+++ b/src/Hacl_Hash_SHA3.c
@@ -2166,7 +2166,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s)
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -2191,14 +2191,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c
index 131c34e6..e0bb7e0b 100644
--- a/src/Hacl_Hash_SHA3_Simd256.c
+++ b/src/Hacl_Hash_SHA3_Simd256.c
@@ -5992,12 +5992,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -6038,15 +6038,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -6378,7 +6378,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/src/Hacl_K256_ECDSA.c b/src/Hacl_K256_ECDSA.c
index 0b72b166..07096b1e 100644
--- a/src/Hacl_K256_ECDSA.c
+++ b/src/Hacl_K256_ECDSA.c
@@ -30,34 +30,32 @@
 #include "internal/Hacl_Bignum_K256.h"
 #include "internal/Hacl_Bignum_Base.h"
 
-static inline uint64_t
-bn_add(uint32_t aLen, uint64_t *a, uint32_t bLen, uint64_t *b, uint64_t *res)
+static inline uint64_t bn_add_sa(uint32_t aLen, uint32_t bLen, uint64_t *b, uint64_t *res)
 {
-  uint64_t *a0 = a;
   uint64_t *res0 = res;
   uint64_t c0 = 0ULL;
   for (uint32_t i = 0U; i < bLen / 4U; i++)
   {
-    uint64_t t1 = a0[4U * i];
+    uint64_t t1 = res0[4U * i];
     uint64_t t20 = b[4U * i];
     uint64_t *res_i0 = res0 + 4U * i;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0);
-    uint64_t t10 = a0[4U * i + 1U];
+    uint64_t t10 = res0[4U * i + 1U];
     uint64_t t21 = b[4U * i + 1U];
     uint64_t *res_i1 = res0 + 4U * i + 1U;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1);
-    uint64_t t11 = a0[4U * i + 2U];
+    uint64_t t11 = res0[4U * i + 2U];
     uint64_t t22 = b[4U * i + 2U];
     uint64_t *res_i2 = res0 + 4U * i + 2U;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2);
-    uint64_t t12 = a0[4U * i + 3U];
+    uint64_t t12 = res0[4U * i + 3U];
     uint64_t t2 = b[4U * i + 3U];
     uint64_t *res_i = res0 + 4U * i + 3U;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i);
   }
   for (uint32_t i = bLen / 4U * 4U; i < bLen; i++)
   {
-    uint64_t t1 = a0[i];
+    uint64_t t1 = res0[i];
     uint64_t t2 = b[i];
     uint64_t *res_i = res0 + i;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t2, res_i);
@@ -65,27 +63,26 @@ bn_add(uint32_t aLen, uint64_t *a, uint32_t bLen, uint64_t *b, uint64_t *res)
   uint64_t c00 = c0;
   if (bLen < aLen)
   {
-    uint64_t *a1 = a + bLen;
     uint64_t *res1 = res + bLen;
     uint64_t c = c00;
     for (uint32_t i = 0U; i < (aLen - bLen) / 4U; i++)
     {
-      uint64_t t1 = a1[4U * i];
+      uint64_t t1 = res1[4U * i];
       uint64_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i0);
-      uint64_t t10 = a1[4U * i + 1U];
+      uint64_t t10 = res1[4U * i + 1U];
       uint64_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, 0ULL, res_i1);
-      uint64_t t11 = a1[4U * i + 2U];
+      uint64_t t11 = res1[4U * i + 2U];
       uint64_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i2);
-      uint64_t t12 = a1[4U * i + 3U];
+      uint64_t t12 = res1[4U * i + 3U];
       uint64_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, 0ULL, res_i);
     }
     for (uint32_t i = (aLen - bLen) / 4U * 4U; i < aLen - bLen; i++)
     {
-      uint64_t t1 = a1[i];
+      uint64_t t1 = res1[i];
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i);
     }
@@ -167,8 +164,8 @@ static void add_mod4(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -221,8 +218,8 @@ static void sub_mod4(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -261,8 +258,8 @@ static void sqr4(uint64_t *a, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *ab = a;
     uint64_t a_j = a[i0];
+    uint64_t *ab = a;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -288,7 +285,12 @@ static void sqr4(uint64_t *a, uint64_t *res)
     }
     uint64_t r = c;
     res[i0 + i0] = r;);
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, res, res);
+  uint64_t a_copy0[8U] = { 0U };
+  uint64_t b_copy0[8U] = { 0U };
+  memcpy(a_copy0, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy0, res, 8U * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint64_t tmp[8U] = { 0U };
   KRML_MAYBE_FOR4(i,
@@ -300,7 +302,12 @@ static void sqr4(uint64_t *a, uint64_t *res)
     uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, tmp, res);
+  uint64_t a_copy[8U] = { 0U };
+  uint64_t b_copy[8U] = { 0U };
+  memcpy(a_copy, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy, tmp, 8U * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -339,9 +346,9 @@ static inline uint64_t load_qelem_check(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = f;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = f;
     os[i] = x;);
   uint64_t is_zero = is_qelem_zero(f);
   uint64_t acc = 0ULL;
@@ -362,9 +369,9 @@ static inline bool load_qelem_vartime(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = f;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = f;
     os[i] = x;);
   bool is_zero = is_qelem_zero_vartime(f);
   uint64_t a0 = f[0U];
@@ -412,8 +419,8 @@ static inline void modq_short(uint64_t *out, uint64_t *a)
     0U,
     4U,
     1U,
-    uint64_t *os = out;
     uint64_t x = (mask & out[i]) | (~mask & a[i]);
+    uint64_t *os = out;
     os[i] = x;);
 }
 
@@ -424,9 +431,9 @@ static inline void load_qelem_modq(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = f;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = f;
     os[i] = x;);
   memcpy(tmp, f, 4U * sizeof (uint64_t));
   modq_short(f, tmp);
@@ -494,8 +501,8 @@ mul_pow2_256_minus_q_add(
     uint64_t r = c;
     tmp[len + i0] = r;);
   memcpy(res + 2U, a, len * sizeof (uint64_t));
-  bn_add(resLen, res, len + 2U, tmp, res);
-  uint64_t c = bn_add(resLen, res, 4U, e, res);
+  bn_add_sa(resLen, len + 2U, tmp, res);
+  uint64_t c = bn_add_sa(resLen, 4U, e, res);
   return c;
 }
 
@@ -510,9 +517,15 @@ static inline void modq(uint64_t *out, uint64_t *a)
   uint64_t *t01 = tmp;
   uint64_t m[7U] = { 0U };
   uint64_t p[5U] = { 0U };
-  mul_pow2_256_minus_q_add(4U, 7U, t01, a + 4U, a, m);
-  mul_pow2_256_minus_q_add(3U, 5U, t01, m + 4U, m, p);
-  uint64_t c2 = mul_pow2_256_minus_q_add(1U, 4U, t01, p + 4U, p, r);
+  uint64_t *a0 = a;
+  uint64_t *a1 = a + 4U;
+  mul_pow2_256_minus_q_add(4U, 7U, t01, a1, a0, m);
+  uint64_t *m0 = m;
+  uint64_t *m1 = m + 4U;
+  mul_pow2_256_minus_q_add(3U, 5U, t01, m1, m0, p);
+  uint64_t *p0 = p;
+  uint64_t *p1 = p + 4U;
+  uint64_t c2 = mul_pow2_256_minus_q_add(1U, 4U, t01, p1, p0, r);
   uint64_t c0 = c2;
   uint64_t c1 = add4(r, tmp, out);
   uint64_t mask = 0ULL - (c0 + c1);
@@ -520,8 +533,8 @@ static inline void modq(uint64_t *out, uint64_t *a)
     0U,
     4U,
     1U,
-    uint64_t *os = out;
     uint64_t x = (mask & out[i]) | (~mask & r[i]);
+    uint64_t *os = out;
     os[i] = x;);
 }
 
@@ -607,8 +620,8 @@ static inline void qmul_shift_384(uint64_t *res, uint64_t *a, uint64_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (mask & res[i]) | (~mask & res_b_padded[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -649,68 +662,130 @@ static inline void qinv(uint64_t *out, uint64_t *f)
   uint64_t x8[4U] = { 0U };
   uint64_t x14[4U] = { 0U };
   qsquare_times(x6, x_1101, 2U);
-  qmul(x6, x6, x_1011);
+  uint64_t f1_copy0[4U] = { 0U };
+  memcpy(f1_copy0, x6, 4U * sizeof (uint64_t));
+  qmul(x6, f1_copy0, x_1011);
   qsquare_times(x8, x6, 2U);
-  qmul(x8, x8, x_11);
+  uint64_t f1_copy1[4U] = { 0U };
+  memcpy(f1_copy1, x8, 4U * sizeof (uint64_t));
+  qmul(x8, f1_copy1, x_11);
   qsquare_times(x14, x8, 6U);
-  qmul(x14, x14, x6);
+  uint64_t f1_copy2[4U] = { 0U };
+  memcpy(f1_copy2, x14, 4U * sizeof (uint64_t));
+  qmul(x14, f1_copy2, x6);
   uint64_t x56[4U] = { 0U };
   qsquare_times(out, x14, 14U);
-  qmul(out, out, x14);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy, x14);
   qsquare_times(x56, out, 28U);
-  qmul(x56, x56, out);
+  uint64_t f1_copy3[4U] = { 0U };
+  memcpy(f1_copy3, x56, 4U * sizeof (uint64_t));
+  qmul(x56, f1_copy3, out);
   qsquare_times(out, x56, 56U);
-  qmul(out, out, x56);
+  uint64_t f1_copy4[4U] = { 0U };
+  memcpy(f1_copy4, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy4, x56);
   qsquare_times_in_place(out, 14U);
-  qmul(out, out, x14);
+  uint64_t f1_copy5[4U] = { 0U };
+  memcpy(f1_copy5, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy5, x14);
   qsquare_times_in_place(out, 3U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy6[4U] = { 0U };
+  memcpy(f1_copy6, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy6, x_101);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy7[4U] = { 0U };
+  memcpy(f1_copy7, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy7, x_111);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy8[4U] = { 0U };
+  memcpy(f1_copy8, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy8, x_101);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_1011);
+  uint64_t f1_copy9[4U] = { 0U };
+  memcpy(f1_copy9, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy9, x_1011);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_1011);
+  uint64_t f1_copy10[4U] = { 0U };
+  memcpy(f1_copy10, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy10, x_1011);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy11[4U] = { 0U };
+  memcpy(f1_copy11, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy11, x_111);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy12[4U] = { 0U };
+  memcpy(f1_copy12, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy12, x_111);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy13[4U] = { 0U };
+  memcpy(f1_copy13, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy13, x_1101);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy14[4U] = { 0U };
+  memcpy(f1_copy14, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy14, x_101);
   qsquare_times_in_place(out, 3U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy15[4U] = { 0U };
+  memcpy(f1_copy15, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy15, x_111);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_1001);
+  uint64_t f1_copy16[4U] = { 0U };
+  memcpy(f1_copy16, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy16, x_1001);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy17[4U] = { 0U };
+  memcpy(f1_copy17, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy17, x_101);
   qsquare_times_in_place(out, 10U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy18[4U] = { 0U };
+  memcpy(f1_copy18, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy18, x_111);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy19[4U] = { 0U };
+  memcpy(f1_copy19, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy19, x_111);
   qsquare_times_in_place(out, 9U);
-  qmul(out, out, x8);
+  uint64_t f1_copy20[4U] = { 0U };
+  memcpy(f1_copy20, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy20, x8);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_1001);
+  uint64_t f1_copy21[4U] = { 0U };
+  memcpy(f1_copy21, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy21, x_1001);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_1011);
+  uint64_t f1_copy22[4U] = { 0U };
+  memcpy(f1_copy22, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy22, x_1011);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy23[4U] = { 0U };
+  memcpy(f1_copy23, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy23, x_1101);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_11);
+  uint64_t f1_copy24[4U] = { 0U };
+  memcpy(f1_copy24, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy24, x_11);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy25[4U] = { 0U };
+  memcpy(f1_copy25, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy25, x_1101);
   qsquare_times_in_place(out, 10U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy26[4U] = { 0U };
+  memcpy(f1_copy26, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy26, x_1101);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_1001);
+  uint64_t f1_copy27[4U] = { 0U };
+  memcpy(f1_copy27, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy27, x_1001);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, f);
+  uint64_t f1_copy28[4U] = { 0U };
+  memcpy(f1_copy28, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy28, f);
   qsquare_times_in_place(out, 8U);
-  qmul(out, out, x6);
+  uint64_t f1_copy29[4U] = { 0U };
+  memcpy(f1_copy29, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy29, x6);
 }
 
 void Hacl_Impl_K256_Point_make_point_at_inf(uint64_t *p)
@@ -735,8 +810,12 @@ static inline void to_aff_point(uint64_t *p_aff, uint64_t *p)
   Hacl_Impl_K256_Finv_finv(zinv, z1);
   Hacl_K256_Field_fmul(x, x1, zinv);
   Hacl_K256_Field_fmul(y, y1, zinv);
-  Hacl_K256_Field_fnormalize(x, x);
-  Hacl_K256_Field_fnormalize(y, y);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, x, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(x, f_copy);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y, f_copy0);
 }
 
 static inline void to_aff_point_x(uint64_t *x, uint64_t *p)
@@ -746,7 +825,9 @@ static inline void to_aff_point_x(uint64_t *x, uint64_t *p)
   uint64_t zinv[5U] = { 0U };
   Hacl_Impl_K256_Finv_finv(zinv, z1);
   Hacl_K256_Field_fmul(x, x1, zinv);
-  Hacl_K256_Field_fnormalize(x, x);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, x, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(x, f_copy);
 }
 
 static inline bool is_on_curve_vartime(uint64_t *p)
@@ -761,12 +842,20 @@ static inline bool is_on_curve_vartime(uint64_t *p)
   b[3U] = 0ULL;
   b[4U] = 0ULL;
   Hacl_K256_Field_fsqr(y2_exp, x);
-  Hacl_K256_Field_fmul(y2_exp, y2_exp, x);
-  Hacl_K256_Field_fadd(y2_exp, y2_exp, b);
-  Hacl_K256_Field_fnormalize(y2_exp, y2_exp);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, y2_exp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(y2_exp, f1_copy, x);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, y2_exp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y2_exp, f1_copy0, b);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y2_exp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2_exp, f_copy0);
   uint64_t y2_comp[5U] = { 0U };
   Hacl_K256_Field_fsqr(y2_comp, y);
-  Hacl_K256_Field_fnormalize(y2_comp, y2_comp);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, y2_comp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2_comp, f_copy);
   bool res = Hacl_K256_Field_is_felem_eq_vartime(y2_exp, y2_comp);
   bool res0 = res;
   return res0;
@@ -810,14 +899,18 @@ void Hacl_Impl_K256_Point_point_negate(uint64_t *out, uint64_t *p)
   oy[2U] = f2;
   oy[3U] = f3;
   oy[4U] = f4;
-  Hacl_K256_Field_fnormalize_weak(oy, oy);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, oy, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(oy, f_copy);
 }
 
 static inline void point_negate_conditional_vartime(uint64_t *p, bool is_negate)
 {
   if (is_negate)
   {
-    Hacl_Impl_K256_Point_point_negate(p, p);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, p, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_Point_point_negate(p, p_copy);
     return;
   }
 }
@@ -894,14 +987,24 @@ static inline bool aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_
   b[3U] = 0ULL;
   b[4U] = 0ULL;
   Hacl_K256_Field_fsqr(y2, x);
-  Hacl_K256_Field_fmul(y2, y2, x);
-  Hacl_K256_Field_fadd(y2, y2, b);
-  Hacl_K256_Field_fnormalize(y2, y2);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, y2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(y2, f1_copy, x);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, y2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y2, f1_copy0, b);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2, f_copy0);
   Hacl_Impl_K256_Finv_fsqrt(y, y2);
-  Hacl_K256_Field_fnormalize(y, y);
+  uint64_t f_copy1[5U] = { 0U };
+  memcpy(f_copy1, y, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y, f_copy1);
   uint64_t y2_comp[5U] = { 0U };
   Hacl_K256_Field_fsqr(y2_comp, y);
-  Hacl_K256_Field_fnormalize(y2_comp, y2_comp);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, y2_comp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2_comp, f_copy);
   bool res = Hacl_K256_Field_is_felem_eq_vartime(y2, y2_comp);
   bool is_y_valid = res;
   bool is_y_valid0 = is_y_valid;
@@ -932,22 +1035,38 @@ void Hacl_Impl_K256_PointDouble_point_double(uint64_t *out, uint64_t *p)
   Hacl_K256_Field_fsqr(yy, y1);
   Hacl_K256_Field_fsqr(zz, z1);
   Hacl_K256_Field_fmul_small_num(x3, x1, 2ULL);
-  Hacl_K256_Field_fmul(x3, x3, y1);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy, y1);
   Hacl_K256_Field_fmul(tmp1, yy, y1);
   Hacl_K256_Field_fmul(z3, tmp1, z1);
-  Hacl_K256_Field_fmul_small_num(z3, z3, 8ULL);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul_small_num(z3, f_copy, 8ULL);
   Hacl_K256_Field_fnormalize_weak(z3, z3);
   Hacl_K256_Field_fmul_small_num(bzz3, zz, 21ULL);
   Hacl_K256_Field_fnormalize_weak(bzz3, bzz3);
   Hacl_K256_Field_fmul_small_num(bzz9, bzz3, 3ULL);
-  Hacl_K256_Field_fsub(bzz9, yy, bzz9, 6ULL);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, bzz9, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(bzz9, yy, f2_copy, 6ULL);
   Hacl_K256_Field_fadd(tmp1, yy, bzz3);
-  Hacl_K256_Field_fmul(tmp1, bzz9, tmp1);
+  uint64_t f2_copy0[5U] = { 0U };
+  memcpy(f2_copy0, tmp1, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(tmp1, bzz9, f2_copy0);
   Hacl_K256_Field_fmul(y3, yy, zz);
-  Hacl_K256_Field_fmul(x3, x3, bzz9);
-  Hacl_K256_Field_fmul_small_num(y3, y3, 168ULL);
-  Hacl_K256_Field_fadd(y3, tmp1, y3);
-  Hacl_K256_Field_fnormalize_weak(y3, y3);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy0, bzz9);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul_small_num(y3, f_copy0, 168ULL);
+  uint64_t f2_copy1[5U] = { 0U };
+  memcpy(f2_copy1, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y3, tmp1, f2_copy1);
+  uint64_t f_copy1[5U] = { 0U };
+  memcpy(f_copy1, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(y3, f_copy1);
 }
 
 void Hacl_Impl_K256_PointAdd_point_add(uint64_t *out, uint64_t *p, uint64_t *q)
@@ -999,17 +1118,35 @@ void Hacl_Impl_K256_PointAdd_point_add(uint64_t *out, uint64_t *p, uint64_t *q)
   Hacl_K256_Field_fmul_small_num(y3, z3, 21ULL);
   Hacl_K256_Field_fnormalize_weak(y3, y3);
   Hacl_K256_Field_fmul(tmp1, xy_pairs, yy_m_bzz3);
-  Hacl_K256_Field_fmul(x3, x3, xz_pairs);
-  Hacl_K256_Field_fsub(x3, tmp1, x3, 2ULL);
-  Hacl_K256_Field_fnormalize_weak(x3, x3);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy, xz_pairs);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(x3, tmp1, f2_copy, 2ULL);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(x3, f_copy);
   Hacl_K256_Field_fmul(tmp1, yy_p_bzz3, yy_m_bzz3);
-  Hacl_K256_Field_fmul(y3, y3, xz_pairs);
-  Hacl_K256_Field_fadd(y3, tmp1, y3);
-  Hacl_K256_Field_fnormalize_weak(y3, y3);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(y3, f1_copy0, xz_pairs);
+  uint64_t f2_copy0[5U] = { 0U };
+  memcpy(f2_copy0, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y3, tmp1, f2_copy0);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(y3, f_copy0);
   Hacl_K256_Field_fmul(tmp1, yz_pairs, yy_p_bzz3);
-  Hacl_K256_Field_fmul(z3, z3, xy_pairs);
-  Hacl_K256_Field_fadd(z3, tmp1, z3);
-  Hacl_K256_Field_fnormalize_weak(z3, z3);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(z3, f1_copy1, xy_pairs);
+  uint64_t f2_copy1[5U] = { 0U };
+  memcpy(f2_copy1, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(z3, tmp1, f2_copy1);
+  uint64_t f_copy1[5U] = { 0U };
+  memcpy(f_copy1, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(z3, f_copy1);
 }
 
 static inline void scalar_split_lambda(uint64_t *r1, uint64_t *r2, uint64_t *k)
@@ -1081,7 +1218,9 @@ static inline void point_mul_lambda_inplace(uint64_t *res)
   beta[2U] = 0xc3434e99cf049ULL;
   beta[3U] = 0x7106e64479eaULL;
   beta[4U] = 0x7ae96a2b657cULL;
-  Hacl_K256_Field_fmul(rx, beta, rx);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, rx, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(rx, beta, f2_copy);
 }
 
 typedef struct __bool_bool_s
@@ -1123,23 +1262,35 @@ void Hacl_Impl_K256_PointMul_point_mul(uint64_t *out, uint64_t *scalar, uint64_t
   uint64_t *t1 = table + 15U;
   Hacl_Impl_K256_Point_make_point_at_inf(t0);
   memcpy(t1, q, 15U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 15U;
-    Hacl_Impl_K256_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, t11, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointDouble_point_double(tmp, p_copy0);
     memcpy(table + (2U * i + 2U) * 15U, tmp, 15U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 15U;
-    Hacl_Impl_K256_PointAdd_point_add(tmp, q, t2);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, q, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table + (2U * i + 3U) * 15U, tmp, 15U * sizeof (uint64_t)););
   Hacl_Impl_K256_Point_make_point_at_inf(out);
   uint64_t tmp0[15U] = { 0U };
   for (uint32_t i0 = 0U; i0 < 64U; i0++)
   {
-    KRML_MAYBE_FOR4(i, 0U, 4U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[15U] = { 0U };
+      memcpy(p_copy, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy););
     uint32_t k = 256U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 15U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -1151,10 +1302,12 @@ void Hacl_Impl_K256_PointMul_point_mul(uint64_t *out, uint64_t *scalar, uint64_t
         0U,
         15U,
         1U,
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;););
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp0);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp0);
   }
 }
 
@@ -1171,8 +1324,8 @@ static inline void precomp_get_consttime(const uint64_t *table, uint64_t bits_l,
       0U,
       15U,
       1U,
-      uint64_t *os = tmp;
       uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+      uint64_t *os = tmp;
       os[i] = x;););
 }
 
@@ -1231,23 +1384,41 @@ static inline void point_mul_g(uint64_t *out, uint64_t *scalar)
     0U,
     16U,
     1U,
-    KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i0,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[15U] = { 0U };
+      memcpy(p_copy, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy););
     uint32_t k = 64U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r4, k, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_g_pow2_192_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp);
     uint32_t k0 = 64U - 4U * i - 4U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r3, k0, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_g_pow2_128_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy0, tmp);
     uint32_t k1 = 64U - 4U * i - 4U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r2, k1, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_g_pow2_64_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy1[15U] = { 0U };
+    memcpy(p_copy1, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy1, tmp);
     uint32_t k2 = 64U - 4U * i - 4U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r1, k2, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp););
+    uint64_t p_copy2[15U] = { 0U };
+    memcpy(p_copy2, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy2, tmp););
 }
 
 static inline void
@@ -1275,15 +1446,20 @@ point_mul_g_double_vartime(uint64_t *out, uint64_t *scalar1, uint64_t *scalar2,
   uint64_t *t1 = table2 + 15U;
   Hacl_Impl_K256_Point_make_point_at_inf(t0);
   memcpy(t1, q2, 15U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 15U;
-    Hacl_Impl_K256_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, t11, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointDouble_point_double(tmp, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 15U, tmp, 15U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 15U;
-    Hacl_Impl_K256_PointAdd_point_add(tmp, q2, t2);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, q2, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 15U, tmp, 15U * sizeof (uint64_t)););
   uint64_t tmp0[15U] = { 0U };
   uint32_t i0 = 255U;
@@ -1296,25 +1472,39 @@ point_mul_g_double_vartime(uint64_t *out, uint64_t *scalar1, uint64_t *scalar2,
   uint32_t bits_l320 = (uint32_t)bits_c0;
   const uint64_t *a_bits_l0 = table2 + bits_l320 * 15U;
   memcpy(tmp0, (uint64_t *)a_bits_l0, 15U * sizeof (uint64_t));
-  Hacl_Impl_K256_PointAdd_point_add(out, out, tmp0);
+  uint64_t p_copy[15U] = { 0U };
+  memcpy(p_copy, out, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp0);
   uint64_t tmp1[15U] = { 0U };
   for (uint32_t i = 0U; i < 51U; i++)
   {
-    KRML_MAYBE_FOR5(i2, 0U, 5U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR5(i2,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy0[15U] = { 0U };
+      memcpy(p_copy0, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy0););
     uint32_t k = 255U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar2, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l321 = (uint32_t)bits_l;
     const uint64_t *a_bits_l1 = table2 + bits_l321 * 15U;
     memcpy(tmp1, (uint64_t *)a_bits_l1, 15U * sizeof (uint64_t));
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp1);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy0, tmp1);
     uint32_t k0 = 255U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar1, k0, 5U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l322 = (uint32_t)bits_l0;
     const
     uint64_t
     *a_bits_l2 = Hacl_K256_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * 15U;
     memcpy(tmp1, (uint64_t *)a_bits_l2, 15U * sizeof (uint64_t));
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp1);
+    uint64_t p_copy1[15U] = { 0U };
+    memcpy(p_copy1, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy1, tmp1);
   }
 }
 
@@ -1338,15 +1528,20 @@ point_mul_g_double_split_lambda_table(
   uint64_t *t1 = table2 + 15U;
   Hacl_Impl_K256_Point_make_point_at_inf(t0);
   memcpy(t1, p2, 15U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 15U;
-    Hacl_Impl_K256_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, t11, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointDouble_point_double(tmp, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 15U, tmp, 15U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 15U;
-    Hacl_Impl_K256_PointAdd_point_add(tmp, p2, t2);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, p2, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 15U, tmp, 15U * sizeof (uint64_t)););
   uint64_t tmp0[15U] = { 0U };
   uint64_t tmp1[15U] = { 0U };
@@ -1365,7 +1560,9 @@ point_mul_g_double_split_lambda_table(
   memcpy(tmp1, (uint64_t *)a_bits_l0, 15U * sizeof (uint64_t));
   point_negate_conditional_vartime(tmp1, is_negate2);
   point_mul_lambda_inplace(tmp1);
-  Hacl_Impl_K256_PointAdd_point_add(out, out, tmp1);
+  uint64_t p_copy[15U] = { 0U };
+  memcpy(p_copy, out, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp1);
   uint64_t tmp10[15U] = { 0U };
   uint32_t i2 = 125U;
   uint64_t bits_c1 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r3, i2, 5U);
@@ -1380,29 +1577,46 @@ point_mul_g_double_split_lambda_table(
   memcpy(tmp10, (uint64_t *)a_bits_l2, 15U * sizeof (uint64_t));
   point_negate_conditional_vartime(tmp10, is_negate4);
   point_mul_lambda_inplace(tmp10);
-  Hacl_Impl_K256_PointAdd_point_add(tmp0, tmp0, tmp10);
-  Hacl_Impl_K256_PointAdd_point_add(out, out, tmp0);
+  uint64_t p_copy0[15U] = { 0U };
+  memcpy(p_copy0, tmp0, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(tmp0, p_copy0, tmp10);
+  uint64_t p_copy1[15U] = { 0U };
+  memcpy(p_copy1, out, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(out, p_copy1, tmp0);
   uint64_t tmp2[15U] = { 0U };
   for (uint32_t i = 0U; i < 25U; i++)
   {
-    KRML_MAYBE_FOR5(i4, 0U, 5U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR5(i4,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy2[15U] = { 0U };
+      memcpy(p_copy2, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy2););
     uint32_t k = 125U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r4, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l323 = (uint32_t)bits_l;
     const uint64_t *a_bits_l3 = table2 + bits_l323 * 15U;
     memcpy(tmp2, (uint64_t *)a_bits_l3, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate4);
     point_mul_lambda_inplace(tmp2);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy2[15U] = { 0U };
+    memcpy(p_copy2, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy2, tmp2);
     uint32_t k0 = 125U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r3, k0, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l324 = (uint32_t)bits_l0;
     const uint64_t *a_bits_l4 = table2 + bits_l324 * 15U;
     memcpy(tmp2, (uint64_t *)a_bits_l4, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate3);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy3[15U] = { 0U };
+    memcpy(p_copy3, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy3, tmp2);
     uint32_t k1 = 125U - 5U * i - 5U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r2, k1, 5U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l325 = (uint32_t)bits_l1;
     const
     uint64_t
@@ -1410,16 +1624,21 @@ point_mul_g_double_split_lambda_table(
     memcpy(tmp2, (uint64_t *)a_bits_l5, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate2);
     point_mul_lambda_inplace(tmp2);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy4[15U] = { 0U };
+    memcpy(p_copy4, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy4, tmp2);
     uint32_t k2 = 125U - 5U * i - 5U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r1, k2, 5U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l326 = (uint32_t)bits_l2;
     const
     uint64_t
     *a_bits_l6 = Hacl_K256_PrecompTable_precomp_basepoint_table_w5 + bits_l326 * 15U;
     memcpy(tmp2, (uint64_t *)a_bits_l6, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate1);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy5[15U] = { 0U };
+    memcpy(p_copy5, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy5, tmp2);
   }
 }
 
@@ -1520,7 +1739,9 @@ static inline bool fmul_eq_vartime(uint64_t *r, uint64_t *z, uint64_t *x)
 {
   uint64_t tmp[5U] = { 0U };
   Hacl_K256_Field_fmul(tmp, r, z);
-  Hacl_K256_Field_fnormalize(tmp, tmp);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, tmp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(tmp, f_copy);
   bool b = Hacl_K256_Field_is_felem_eq_vartime(tmp, x);
   return b;
 }
@@ -1573,9 +1794,9 @@ Hacl_K256_ECDSA_ecdsa_sign_hashed_msg(
     0U,
     4U,
     1U,
-    uint64_t *os = d_a;
     uint64_t uu____0 = oneq10[i];
     uint64_t x = uu____0 ^ (is_b_valid0 & (d_a[i] ^ uu____0));
+    uint64_t *os = d_a;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid0;
   uint64_t is_b_valid = load_qelem_check(k_q, nonce);
@@ -1584,9 +1805,9 @@ Hacl_K256_ECDSA_ecdsa_sign_hashed_msg(
     0U,
     4U,
     1U,
-    uint64_t *os = k_q;
     uint64_t uu____1 = oneq1[i];
     uint64_t x = uu____1 ^ (is_b_valid & (k_q[i] ^ uu____1));
+    uint64_t *os = k_q;
     os[i] = x;);
   uint64_t is_nonce_valid = is_b_valid;
   uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid;
@@ -1602,8 +1823,12 @@ Hacl_K256_ECDSA_ecdsa_sign_hashed_msg(
   load_qelem_modq(z, msgHash);
   qinv(kinv, k_q);
   qmul(s_q, r_q, d_a);
-  qadd(s_q, z, s_q);
-  qmul(s_q, kinv, s_q);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, s_q, 4U * sizeof (uint64_t));
+  qadd(s_q, z, f2_copy);
+  uint64_t f2_copy0[4U] = { 0U };
+  memcpy(f2_copy0, s_q, 4U * sizeof (uint64_t));
+  qmul(s_q, kinv, f2_copy0);
   store_qelem(signature, r_q);
   store_qelem(signature + 32U, s_q);
   uint64_t is_r_zero = is_qelem_zero(r_q);
@@ -1706,7 +1931,9 @@ Hacl_K256_ECDSA_ecdsa_verify_hashed_msg(uint8_t *m, uint8_t *public_key, uint8_t
       tmp_q[2U] = 0xffffffebaaedcULL;
       tmp_q[3U] = 0xfffffffffffffULL;
       tmp_q[4U] = 0xffffffffffffULL;
-      Hacl_K256_Field_fadd(tmp_q, r_fe, tmp_q);
+      uint64_t f2_copy[5U] = { 0U };
+      memcpy(f2_copy, tmp_q, 5U * sizeof (uint64_t));
+      Hacl_K256_Field_fadd(tmp_q, r_fe, f2_copy);
       return fmul_eq_vartime(tmp_q, z, tmp_x);
     }
     return false;
@@ -2059,9 +2286,9 @@ bool Hacl_K256_ECDSA_secret_to_public(uint8_t *public_key, uint8_t *private_key)
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   point_mul_g(pk, sk);
@@ -2094,9 +2321,9 @@ bool Hacl_K256_ECDSA_ecdh(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   if (is_pk_valid)
diff --git a/src/Hacl_MAC_Poly1305.c b/src/Hacl_MAC_Poly1305.c
index 28cbca5a..fed403b3 100644
--- a/src/Hacl_MAC_Poly1305.c
+++ b/src/Hacl_MAC_Poly1305.c
@@ -445,6 +445,7 @@ Hacl_MAC_Poly1305_state_t *Hacl_MAC_Poly1305_malloc(uint8_t *key)
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
   uint64_t *r1 = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
   uint64_t *block_state = r1;
+  Hacl_MAC_Poly1305_poly1305_init(block_state, key);
   uint8_t *k_ = (uint8_t *)KRML_HOST_CALLOC(32U, sizeof (uint8_t));
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_0 = k_;
@@ -453,22 +454,18 @@ Hacl_MAC_Poly1305_state_t *Hacl_MAC_Poly1305_malloc(uint8_t *key)
   Hacl_MAC_Poly1305_state_t
   *p = (Hacl_MAC_Poly1305_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_MAC_Poly1305_state_t));
   p[0U] = s;
-  Hacl_MAC_Poly1305_poly1305_init(block_state, key);
   return p;
 }
 
 void Hacl_MAC_Poly1305_reset(Hacl_MAC_Poly1305_state_t *state, uint8_t *key)
 {
-  Hacl_MAC_Poly1305_state_t scrut = *state;
-  uint8_t *k_ = scrut.p_key;
-  uint8_t *buf = scrut.buf;
-  uint64_t *block_state = scrut.block_state;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *k_ = (*state).p_key;
   Hacl_MAC_Poly1305_poly1305_init(block_state, key);
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_1 = k_;
-  Hacl_MAC_Poly1305_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U, .p_key = k_1 };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
+  state->p_key = k_1;
 }
 
 /**
@@ -477,8 +474,8 @@ void Hacl_MAC_Poly1305_reset(Hacl_MAC_Poly1305_state_t *state, uint8_t *key)
 Hacl_Streaming_Types_error_code
 Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_MAC_Poly1305_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -494,11 +491,9 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
   }
   if (chunk_len <= 16U - sz)
   {
-    Hacl_MAC_Poly1305_state_t s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)16U == 0ULL && total_len1 > 0ULL)
     {
@@ -511,24 +506,14 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len2;
+    state->p_key = k_1;
   }
   else if (sz == 0U)
   {
-    Hacl_MAC_Poly1305_state_t s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)16U == 0ULL && total_len1 > 0ULL)
     {
@@ -540,7 +525,7 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 16U, buf);
+      poly1305_update(block_state, 16U, buf);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)16U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -556,30 +541,20 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
+    poly1305_update(block_state, data1_len, data1);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
+    state->p_key = k_1;
   }
   else
   {
     uint32_t diff = 16U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_MAC_Poly1305_state_t s1 = *state;
-    uint64_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz10;
     if (total_len10 % (uint64_t)16U == 0ULL && total_len10 > 0ULL)
     {
@@ -589,24 +564,14 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)16U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
-    Hacl_MAC_Poly1305_state_t s10 = *state;
-    uint64_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
-    uint8_t *k_10 = s10.p_key;
+    state->total_len = total_len2;
+    state->p_key = k_1;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_10 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)16U == 0ULL && total_len1 > 0ULL)
     {
@@ -618,7 +583,7 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 16U, buf);
+      poly1305_update(block_state, 16U, buf0);
     }
     uint32_t ite;
     if
@@ -635,30 +600,21 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
-    uint8_t *dst = buf;
+    poly1305_update(block_state, data1_len, data1);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff),
-          .p_key = k_10
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
+    state->p_key = k_10;
   }
   return Hacl_Streaming_Types_Success;
 }
 
 void Hacl_MAC_Poly1305_digest(Hacl_MAC_Poly1305_state_t *state, uint8_t *output)
 {
-  Hacl_MAC_Poly1305_state_t scrut = *state;
-  uint64_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
-  uint8_t *k_ = scrut.p_key;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
+  uint8_t *k_ = (*state).p_key;
   uint32_t r;
   if (total_len % (uint64_t)16U == 0ULL && total_len > 0ULL)
   {
@@ -672,6 +628,7 @@ void Hacl_MAC_Poly1305_digest(Hacl_MAC_Poly1305_state_t *state, uint8_t *output)
   uint64_t r1[25U] = { 0U };
   uint64_t *tmp_block_state = r1;
   memcpy(tmp_block_state, block_state, 25U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 16U == 0U && r > 0U)
   {
@@ -682,7 +639,6 @@ void Hacl_MAC_Poly1305_digest(Hacl_MAC_Poly1305_state_t *state, uint8_t *output)
     ite = r % 16U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   poly1305_update(tmp_block_state, 0U, buf_multi);
   poly1305_update(tmp_block_state, r, buf_last);
   uint64_t tmp[25U] = { 0U };
diff --git a/src/Hacl_MAC_Poly1305_Simd128.c b/src/Hacl_MAC_Poly1305_Simd128.c
index 17e26978..8eae2273 100644
--- a/src/Hacl_MAC_Poly1305_Simd128.c
+++ b/src/Hacl_MAC_Poly1305_Simd128.c
@@ -1310,6 +1310,7 @@ Hacl_MAC_Poly1305_Simd128_state_t *Hacl_MAC_Poly1305_Simd128_malloc(uint8_t *key
       sizeof (Lib_IntVector_Intrinsics_vec128) * 25U);
   memset(r1, 0U, 25U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Lib_IntVector_Intrinsics_vec128 *block_state = r1;
+  Hacl_MAC_Poly1305_Simd128_poly1305_init(block_state, key);
   uint8_t *k_ = (uint8_t *)KRML_HOST_CALLOC(32U, sizeof (uint8_t));
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_0 = k_;
@@ -1321,22 +1322,18 @@ Hacl_MAC_Poly1305_Simd128_state_t *Hacl_MAC_Poly1305_Simd128_malloc(uint8_t *key
         Hacl_MAC_Poly1305_Simd128_state_t
       ));
   p[0U] = s;
-  Hacl_MAC_Poly1305_Simd128_poly1305_init(block_state, key);
   return p;
 }
 
 void Hacl_MAC_Poly1305_Simd128_reset(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8_t *key)
 {
-  Hacl_MAC_Poly1305_Simd128_state_t scrut = *state;
-  uint8_t *k_ = scrut.p_key;
-  uint8_t *buf = scrut.buf;
-  Lib_IntVector_Intrinsics_vec128 *block_state = scrut.block_state;
+  Lib_IntVector_Intrinsics_vec128 *block_state = (*state).block_state;
+  uint8_t *k_ = (*state).p_key;
   Hacl_MAC_Poly1305_Simd128_poly1305_init(block_state, key);
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_1 = k_;
-  Hacl_MAC_Poly1305_Simd128_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U, .p_key = k_1 };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
+  state->p_key = k_1;
 }
 
 /**
@@ -1349,8 +1346,8 @@ Hacl_MAC_Poly1305_Simd128_update(
   uint32_t chunk_len
 )
 {
-  Hacl_MAC_Poly1305_Simd128_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Lib_IntVector_Intrinsics_vec128 *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -1366,11 +1363,9 @@ Hacl_MAC_Poly1305_Simd128_update(
   }
   if (chunk_len <= 32U - sz)
   {
-    Hacl_MAC_Poly1305_Simd128_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)32U == 0ULL && total_len1 > 0ULL)
     {
@@ -1383,24 +1378,14 @@ Hacl_MAC_Poly1305_Simd128_update(
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len2;
+    state->p_key = k_1;
   }
   else if (sz == 0U)
   {
-    Hacl_MAC_Poly1305_Simd128_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)32U == 0ULL && total_len1 > 0ULL)
     {
@@ -1412,7 +1397,7 @@ Hacl_MAC_Poly1305_Simd128_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 32U, buf);
+      poly1305_update(block_state, 32U, buf);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)32U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1428,30 +1413,20 @@ Hacl_MAC_Poly1305_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
+    poly1305_update(block_state, data1_len, data1);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
+    state->p_key = k_1;
   }
   else
   {
     uint32_t diff = 32U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_MAC_Poly1305_Simd128_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz10;
     if (total_len10 % (uint64_t)32U == 0ULL && total_len10 > 0ULL)
     {
@@ -1461,24 +1436,14 @@ Hacl_MAC_Poly1305_Simd128_update(
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)32U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
-    Hacl_MAC_Poly1305_Simd128_state_t s10 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
-    uint8_t *k_10 = s10.p_key;
+    state->total_len = total_len2;
+    state->p_key = k_1;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_10 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)32U == 0ULL && total_len1 > 0ULL)
     {
@@ -1490,7 +1455,7 @@ Hacl_MAC_Poly1305_Simd128_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 32U, buf);
+      poly1305_update(block_state, 32U, buf0);
     }
     uint32_t ite;
     if
@@ -1507,19 +1472,11 @@ Hacl_MAC_Poly1305_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
-    uint8_t *dst = buf;
+    poly1305_update(block_state, data1_len, data1);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff),
-          .p_key = k_10
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
+    state->p_key = k_10;
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1527,11 +1484,10 @@ Hacl_MAC_Poly1305_Simd128_update(
 void
 Hacl_MAC_Poly1305_Simd128_digest(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8_t *output)
 {
-  Hacl_MAC_Poly1305_Simd128_state_t scrut = *state;
-  Lib_IntVector_Intrinsics_vec128 *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
-  uint8_t *k_ = scrut.p_key;
+  Lib_IntVector_Intrinsics_vec128 *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
+  uint8_t *k_ = (*state).p_key;
   uint32_t r;
   if (total_len % (uint64_t)32U == 0ULL && total_len > 0ULL)
   {
@@ -1545,6 +1501,7 @@ Hacl_MAC_Poly1305_Simd128_digest(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 r1[25U] KRML_POST_ALIGN(16) = { 0U };
   Lib_IntVector_Intrinsics_vec128 *tmp_block_state = r1;
   memcpy(tmp_block_state, block_state, 25U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite0;
   if (r % 16U == 0U && r > 0U)
   {
@@ -1555,7 +1512,6 @@ Hacl_MAC_Poly1305_Simd128_digest(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8
     ite0 = r % 16U;
   }
   uint8_t *buf_last = buf_1 + r - ite0;
-  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 16U == 0U && r > 0U)
   {
diff --git a/src/Hacl_MAC_Poly1305_Simd256.c b/src/Hacl_MAC_Poly1305_Simd256.c
index f25e8fff..c5e7784b 100644
--- a/src/Hacl_MAC_Poly1305_Simd256.c
+++ b/src/Hacl_MAC_Poly1305_Simd256.c
@@ -1761,6 +1761,7 @@ Hacl_MAC_Poly1305_Simd256_state_t *Hacl_MAC_Poly1305_Simd256_malloc(uint8_t *key
       sizeof (Lib_IntVector_Intrinsics_vec256) * 25U);
   memset(r1, 0U, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Lib_IntVector_Intrinsics_vec256 *block_state = r1;
+  Hacl_MAC_Poly1305_Simd256_poly1305_init(block_state, key);
   uint8_t *k_ = (uint8_t *)KRML_HOST_CALLOC(32U, sizeof (uint8_t));
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_0 = k_;
@@ -1772,22 +1773,18 @@ Hacl_MAC_Poly1305_Simd256_state_t *Hacl_MAC_Poly1305_Simd256_malloc(uint8_t *key
         Hacl_MAC_Poly1305_Simd256_state_t
       ));
   p[0U] = s;
-  Hacl_MAC_Poly1305_Simd256_poly1305_init(block_state, key);
   return p;
 }
 
 void Hacl_MAC_Poly1305_Simd256_reset(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8_t *key)
 {
-  Hacl_MAC_Poly1305_Simd256_state_t scrut = *state;
-  uint8_t *k_ = scrut.p_key;
-  uint8_t *buf = scrut.buf;
-  Lib_IntVector_Intrinsics_vec256 *block_state = scrut.block_state;
+  Lib_IntVector_Intrinsics_vec256 *block_state = (*state).block_state;
+  uint8_t *k_ = (*state).p_key;
   Hacl_MAC_Poly1305_Simd256_poly1305_init(block_state, key);
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_1 = k_;
-  Hacl_MAC_Poly1305_Simd256_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U, .p_key = k_1 };
-  state[0U] = tmp;
+  state->total_len = (uint64_t)0U;
+  state->p_key = k_1;
 }
 
 /**
@@ -1800,8 +1797,8 @@ Hacl_MAC_Poly1305_Simd256_update(
   uint32_t chunk_len
 )
 {
-  Hacl_MAC_Poly1305_Simd256_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Lib_IntVector_Intrinsics_vec256 *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -1817,11 +1814,9 @@ Hacl_MAC_Poly1305_Simd256_update(
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_MAC_Poly1305_Simd256_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1834,24 +1829,14 @@ Hacl_MAC_Poly1305_Simd256_update(
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len2;
+    state->p_key = k_1;
   }
   else if (sz == 0U)
   {
-    Hacl_MAC_Poly1305_Simd256_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1863,7 +1848,7 @@ Hacl_MAC_Poly1305_Simd256_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 64U, buf);
+      poly1305_update(block_state, 64U, buf);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1879,30 +1864,20 @@ Hacl_MAC_Poly1305_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
+    poly1305_update(block_state, data1_len, data1);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
+    state->p_key = k_1;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_MAC_Poly1305_Simd256_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -1912,24 +1887,14 @@ Hacl_MAC_Poly1305_Simd256_update(
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
-    Hacl_MAC_Poly1305_Simd256_state_t s10 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
-    uint8_t *k_10 = s10.p_key;
+    state->total_len = total_len2;
+    state->p_key = k_1;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_10 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1941,7 +1906,7 @@ Hacl_MAC_Poly1305_Simd256_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 64U, buf);
+      poly1305_update(block_state, 64U, buf0);
     }
     uint32_t ite;
     if
@@ -1958,19 +1923,11 @@ Hacl_MAC_Poly1305_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
-    uint8_t *dst = buf;
+    poly1305_update(block_state, data1_len, data1);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff),
-          .p_key = k_10
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
+    state->p_key = k_10;
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1978,11 +1935,10 @@ Hacl_MAC_Poly1305_Simd256_update(
 void
 Hacl_MAC_Poly1305_Simd256_digest(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8_t *output)
 {
-  Hacl_MAC_Poly1305_Simd256_state_t scrut = *state;
-  Lib_IntVector_Intrinsics_vec256 *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
-  uint8_t *k_ = scrut.p_key;
+  Lib_IntVector_Intrinsics_vec256 *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
+  uint8_t *k_ = (*state).p_key;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -1996,6 +1952,7 @@ Hacl_MAC_Poly1305_Simd256_digest(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 r1[25U] KRML_POST_ALIGN(32) = { 0U };
   Lib_IntVector_Intrinsics_vec256 *tmp_block_state = r1;
   memcpy(tmp_block_state, block_state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite0;
   if (r % 16U == 0U && r > 0U)
   {
@@ -2006,7 +1963,6 @@ Hacl_MAC_Poly1305_Simd256_digest(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8
     ite0 = r % 16U;
   }
   uint8_t *buf_last = buf_1 + r - ite0;
-  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 16U == 0U && r > 0U)
   {
diff --git a/src/Hacl_NaCl.c b/src/Hacl_NaCl.c
index a1bbd25c..54cf0171 100644
--- a/src/Hacl_NaCl.c
+++ b/src/Hacl_NaCl.c
@@ -62,8 +62,8 @@ secretbox_detached(uint32_t mlen, uint8_t *c, uint8_t *tag, uint8_t *k, uint8_t
   memcpy(block0, m0, mlen0 * sizeof (uint8_t));
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = block0;
     uint8_t x = (uint32_t)block0[i] ^ (uint32_t)ekey0[i];
+    uint8_t *os = block0;
     os[i] = x;
   }
   uint8_t *c0 = c;
@@ -117,8 +117,8 @@ secretbox_open_detached(
     memcpy(block0, c0, mlen0 * sizeof (uint8_t));
     for (uint32_t i = 0U; i < 32U; i++)
     {
-      uint8_t *os = block0;
       uint8_t x = (uint32_t)block0[i] ^ (uint32_t)ekey0[i];
+      uint8_t *os = block0;
       os[i] = x;
     }
     uint8_t *m0 = m;
diff --git a/src/Hacl_P256.c b/src/Hacl_P256.c
index 609fed81..3c54af25 100644
--- a/src/Hacl_P256.c
+++ b/src/Hacl_P256.c
@@ -77,9 +77,9 @@ static inline void bn_cmovznz4(uint64_t *res, uint64_t cin, uint64_t *x, uint64_
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t uu____0 = x[i];
     uint64_t x1 = uu____0 ^ (mask & (y[i] ^ uu____0));
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -131,8 +131,8 @@ static inline void bn_add_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -210,8 +210,8 @@ static inline void bn_sub_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -250,8 +250,8 @@ static inline void bn_sqr4(uint64_t *res, uint64_t *x)
     0U,
     4U,
     1U,
-    uint64_t *ab = x;
     uint64_t a_j = x[i0];
+    uint64_t *ab = x;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -277,7 +277,12 @@ static inline void bn_sqr4(uint64_t *res, uint64_t *x)
     }
     uint64_t r = c;
     res[i0 + i0] = r;);
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, res, res);
+  uint64_t a_copy0[8U] = { 0U };
+  uint64_t b_copy0[8U] = { 0U };
+  memcpy(a_copy0, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy0, res, 8U * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint64_t tmp[8U] = { 0U };
   KRML_MAYBE_FOR4(i,
@@ -289,7 +294,12 @@ static inline void bn_sqr4(uint64_t *res, uint64_t *x)
     uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, tmp, res);
+  uint64_t a_copy[8U] = { 0U };
+  uint64_t b_copy[8U] = { 0U };
+  memcpy(a_copy, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy, tmp, 8U * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -306,9 +316,9 @@ static inline void bn_from_bytes_be4(uint64_t *res, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -394,8 +404,11 @@ static inline uint64_t bn_is_lt_prime_mask4(uint64_t *f)
 {
   uint64_t tmp[4U] = { 0U };
   make_prime(tmp);
-  uint64_t c = bn_sub4(tmp, f, tmp);
-  return 0ULL - c;
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, tmp, 4U * sizeof (uint64_t));
+  uint64_t c = bn_sub4(tmp, f, y_copy);
+  uint64_t c0 = c;
+  return 0ULL - c0;
 }
 
 static inline uint64_t feq_mask(uint64_t *a, uint64_t *b)
@@ -423,7 +436,9 @@ static inline void fnegate_conditional_vartime(uint64_t *f, bool is_negate)
   uint64_t zero[4U] = { 0U };
   if (is_negate)
   {
-    fsub0(f, zero, f);
+    uint64_t y_copy[4U] = { 0U };
+    memcpy(y_copy, f, 4U * sizeof (uint64_t));
+    fsub0(f, zero, y_copy);
   }
 }
 
@@ -455,8 +470,8 @@ static inline void mont_reduction(uint64_t *res, uint64_t *x)
     }
     uint64_t r = c;
     uint64_t c1 = r;
-    uint64_t *resb = x + 4U + i0;
     uint64_t res_j = x[4U + i0];
+    uint64_t *resb = x + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
   memcpy(res, x + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -486,8 +501,8 @@ static inline void mont_reduction(uint64_t *res, uint64_t *x)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -529,7 +544,9 @@ static inline void fmul_by_b_coeff(uint64_t *res, uint64_t *x)
 static inline void fcube(uint64_t *res, uint64_t *x)
 {
   fsqr0(res, x);
-  fmul0(res, res, x);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, res, 4U * sizeof (uint64_t));
+  fmul0(res, x_copy, x);
 }
 
 static inline void finv(uint64_t *res, uint64_t *a)
@@ -541,51 +558,121 @@ static inline void finv(uint64_t *res, uint64_t *a)
   uint64_t *tmp2 = tmp + 12U;
   memcpy(x2, a, 4U * sizeof (uint64_t));
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy);
   }
-  fmul0(x2, x2, a);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy, a);
   memcpy(x30, x2, 4U * sizeof (uint64_t));
   {
-    fsqr0(x30, x30);
+    uint64_t x_copy0[4U] = { 0U };
+    memcpy(x_copy0, x30, 4U * sizeof (uint64_t));
+    fsqr0(x30, x_copy0);
   }
-  fmul0(x30, x30, a);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, x30, 4U * sizeof (uint64_t));
+  fmul0(x30, x_copy0, a);
   memcpy(tmp1, x30, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, x30);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy1[4U] = { 0U };
+    memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy1););
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy1, x30);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, fsqr0(tmp2, tmp2););
-  fmul0(tmp2, tmp2, tmp1);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy2[4U] = { 0U };
+    memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy2););
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy2, tmp1);
   memcpy(tmp1, tmp2, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, x30);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy3[4U] = { 0U };
+    memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy3););
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy3, x30);
   memcpy(x30, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR15(i, 0U, 15U, 1U, fsqr0(x30, x30););
-  fmul0(x30, x30, tmp1);
+  KRML_MAYBE_FOR15(i,
+    0U,
+    15U,
+    1U,
+    uint64_t x_copy4[4U] = { 0U };
+    memcpy(x_copy4, x30, 4U * sizeof (uint64_t));
+    fsqr0(x30, x_copy4););
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, x30, 4U * sizeof (uint64_t));
+  fmul0(x30, x_copy4, tmp1);
   memcpy(tmp1, x30, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, x2);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy5[4U] = { 0U };
+    memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy5););
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy5, x2);
   memcpy(x2, tmp1, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy6[4U] = { 0U };
+    memcpy(x_copy6, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy6);
   }
-  fmul0(x2, x2, a);
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy6, a);
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy7[4U] = { 0U };
+    memcpy(x_copy7, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy7);
   }
-  fmul0(x2, x2, tmp1);
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy7, tmp1);
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy8[4U] = { 0U };
+    memcpy(x_copy8, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy8);
   }
-  fmul0(x2, x2, tmp1);
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy8, tmp1);
   for (uint32_t i = 0U; i < 30U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy9[4U] = { 0U };
+    memcpy(x_copy9, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy9);
   }
-  fmul0(x2, x2, x30);
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, fsqr0(x2, x2););
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy9, x30);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy10[4U] = { 0U };
+    memcpy(x_copy10, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy10););
   fmul0(tmp1, x2, a);
   memcpy(res, tmp1, 4U * sizeof (uint64_t));
 }
@@ -597,35 +684,81 @@ static inline void fsqrt(uint64_t *res, uint64_t *a)
   uint64_t *tmp2 = tmp + 4U;
   memcpy(tmp1, a, 4U * sizeof (uint64_t));
   {
-    fsqr0(tmp1, tmp1);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy);
   }
-  fmul0(tmp1, tmp1, a);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy, a);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, fsqr0(tmp2, tmp2););
-  fmul0(tmp2, tmp2, tmp1);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy0[4U] = { 0U };
+    memcpy(x_copy0, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy0););
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy0, tmp1);
   memcpy(tmp1, tmp2, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, tmp2);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy1[4U] = { 0U };
+    memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy1););
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy1, tmp2);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR8(i, 0U, 8U, 1U, fsqr0(tmp2, tmp2););
-  fmul0(tmp2, tmp2, tmp1);
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint64_t x_copy2[4U] = { 0U };
+    memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy2););
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy2, tmp1);
   memcpy(tmp1, tmp2, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR16(i, 0U, 16U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, tmp2);
+  KRML_MAYBE_FOR16(i,
+    0U,
+    16U,
+    1U,
+    uint64_t x_copy3[4U] = { 0U };
+    memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy3););
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy3, tmp2);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    fsqr0(tmp2, tmp2);
+    uint64_t x_copy4[4U] = { 0U };
+    memcpy(x_copy4, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy4);
   }
-  fmul0(tmp2, tmp2, a);
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy4, a);
   for (uint32_t i = 0U; i < 96U; i++)
   {
-    fsqr0(tmp2, tmp2);
+    uint64_t x_copy5[4U] = { 0U };
+    memcpy(x_copy5, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy5);
   }
-  fmul0(tmp2, tmp2, a);
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy5, a);
   for (uint32_t i = 0U; i < 94U; i++)
   {
-    fsqr0(tmp2, tmp2);
+    uint64_t x_copy6[4U] = { 0U };
+    memcpy(x_copy6, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy6);
   }
   memcpy(res, tmp2, 4U * sizeof (uint64_t));
 }
@@ -667,8 +800,12 @@ static inline void to_aff_point(uint64_t *res, uint64_t *p)
   finv(zinv, pz);
   fmul0(x, px, zinv);
   fmul0(y, py, zinv);
-  from_mont(x, x);
-  from_mont(y, y);
+  uint64_t a_copy[4U] = { 0U };
+  memcpy(a_copy, x, 4U * sizeof (uint64_t));
+  from_mont(x, a_copy);
+  uint64_t a_copy0[4U] = { 0U };
+  memcpy(a_copy0, y, 4U * sizeof (uint64_t));
+  from_mont(y, a_copy0);
 }
 
 static inline void to_aff_point_x(uint64_t *res, uint64_t *p)
@@ -678,7 +815,9 @@ static inline void to_aff_point_x(uint64_t *res, uint64_t *p)
   uint64_t *pz = p + 8U;
   finv(zinv, pz);
   fmul0(res, px, zinv);
-  from_mont(res, res);
+  uint64_t a_copy[4U] = { 0U };
+  memcpy(a_copy, res, 4U * sizeof (uint64_t));
+  from_mont(res, a_copy);
 }
 
 static inline void to_proj_point(uint64_t *res, uint64_t *p)
@@ -705,11 +844,19 @@ static inline bool is_on_curve_vartime(uint64_t *p)
   uint64_t tmp[4U] = { 0U };
   fcube(rp, tx);
   make_a_coeff(tmp);
-  fmul0(tmp, tmp, tx);
-  fadd0(rp, tmp, rp);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, tmp, 4U * sizeof (uint64_t));
+  fmul0(tmp, x_copy, tx);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, rp, 4U * sizeof (uint64_t));
+  fadd0(rp, tmp, y_copy);
   make_b_coeff(tmp);
-  fadd0(rp, tmp, rp);
-  fsqr0(ty, ty);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, rp, 4U * sizeof (uint64_t));
+  fadd0(rp, tmp, y_copy0);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, ty, 4U * sizeof (uint64_t));
+  fsqr0(ty, x_copy0);
   uint64_t r = feq_mask(ty, rp);
   bool r0 = r == 0xFFFFFFFFFFFFFFFFULL;
   return r0;
@@ -785,13 +932,21 @@ static inline bool aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_
   uint64_t tmp[4U] = { 0U };
   fcube(y2M, xM);
   make_a_coeff(tmp);
-  fmul0(tmp, tmp, xM);
-  fadd0(y2M, tmp, y2M);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, tmp, 4U * sizeof (uint64_t));
+  fmul0(tmp, x_copy, xM);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, y2M, 4U * sizeof (uint64_t));
+  fadd0(y2M, tmp, y_copy);
   make_b_coeff(tmp);
-  fadd0(y2M, tmp, y2M);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, y2M, 4U * sizeof (uint64_t));
+  fadd0(y2M, tmp, y_copy0);
   fsqrt(yM, y2M);
   from_mont(y, yM);
-  fsqr0(yM, yM);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, yM, 4U * sizeof (uint64_t));
+  fsqr0(yM, x_copy0);
   uint64_t r = feq_mask(yM, y2M);
   bool is_y_valid = r == 0xFFFFFFFFFFFFFFFFULL;
   bool is_y_valid0 = is_y_valid;
@@ -808,8 +963,6 @@ static inline bool aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_
 static inline void point_double(uint64_t *res, uint64_t *p)
 {
   uint64_t tmp[20U] = { 0U };
-  uint64_t *x = p;
-  uint64_t *z = p + 8U;
   uint64_t *x3 = res;
   uint64_t *y3 = res + 4U;
   uint64_t *z3 = res + 8U;
@@ -818,43 +971,103 @@ static inline void point_double(uint64_t *res, uint64_t *p)
   uint64_t *t2 = tmp + 8U;
   uint64_t *t3 = tmp + 12U;
   uint64_t *t4 = tmp + 16U;
-  uint64_t *x1 = p;
+  uint64_t *x = p;
   uint64_t *y = p + 4U;
-  uint64_t *z1 = p + 8U;
-  fsqr0(t0, x1);
+  uint64_t *z0 = p + 8U;
+  fsqr0(t0, x);
   fsqr0(t1, y);
-  fsqr0(t2, z1);
-  fmul0(t3, x1, y);
-  fadd0(t3, t3, t3);
-  fmul0(t4, y, z1);
-  fmul0(z3, x, z);
-  fadd0(z3, z3, z3);
+  fsqr0(t2, z0);
+  fmul0(t3, x, y);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, t3, 4U * sizeof (uint64_t));
+  uint64_t x_copy10[4U] = { 0U };
+  memcpy(x_copy10, x_copy, 4U * sizeof (uint64_t));
+  fadd0(t3, x_copy10, x_copy);
+  fmul0(t4, y, z0);
+  uint64_t *x0 = p;
+  uint64_t *z = p + 8U;
+  fmul0(z3, x0, z);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, z3, 4U * sizeof (uint64_t));
+  uint64_t x_copy11[4U] = { 0U };
+  memcpy(x_copy11, x_copy0, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy11, x_copy0);
   fmul_by_b_coeff(y3, t2);
-  fsub0(y3, y3, z3);
-  fadd0(x3, y3, y3);
-  fadd0(y3, x3, y3);
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x_copy2, z3);
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, y3, 4U * sizeof (uint64_t));
+  fadd0(x3, x_copy3, y3);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, x3, y_copy);
   fsub0(x3, t1, y3);
-  fadd0(y3, t1, y3);
-  fmul0(y3, x3, y3);
-  fmul0(x3, x3, t3);
-  fadd0(t3, t2, t2);
-  fadd0(t2, t2, t3);
-  fmul_by_b_coeff(z3, z3);
-  fsub0(z3, z3, t2);
-  fsub0(z3, z3, t0);
-  fadd0(t3, z3, z3);
-  fadd0(z3, z3, t3);
-  fadd0(t3, t0, t0);
-  fadd0(t0, t3, t0);
-  fsub0(t0, t0, t2);
-  fmul0(t0, t0, z3);
-  fadd0(y3, y3, t0);
-  fadd0(t0, t4, t4);
-  fmul0(z3, t0, z3);
-  fsub0(x3, x3, z3);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, t1, y_copy0);
+  uint64_t y_copy1[4U] = { 0U };
+  memcpy(y_copy1, y3, 4U * sizeof (uint64_t));
+  fmul0(y3, x3, y_copy1);
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, x3, 4U * sizeof (uint64_t));
+  fmul0(x3, x_copy4, t3);
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, t2, 4U * sizeof (uint64_t));
+  fadd0(t3, x_copy5, t2);
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, t2, 4U * sizeof (uint64_t));
+  fadd0(t2, x_copy6, t3);
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, z3, 4U * sizeof (uint64_t));
+  fmul_by_b_coeff(z3, x_copy7);
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, z3, 4U * sizeof (uint64_t));
+  fsub0(z3, x_copy8, t2);
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, z3, 4U * sizeof (uint64_t));
+  fsub0(z3, x_copy9, t0);
+  uint64_t x_copy12[4U] = { 0U };
+  memcpy(x_copy12, z3, 4U * sizeof (uint64_t));
+  fadd0(t3, x_copy12, z3);
+  uint64_t x_copy13[4U] = { 0U };
+  memcpy(x_copy13, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy13, t3);
+  uint64_t x_copy14[4U] = { 0U };
+  memcpy(x_copy14, t0, 4U * sizeof (uint64_t));
+  fadd0(t3, x_copy14, t0);
+  uint64_t y_copy2[4U] = { 0U };
+  memcpy(y_copy2, t0, 4U * sizeof (uint64_t));
+  fadd0(t0, t3, y_copy2);
+  uint64_t x_copy15[4U] = { 0U };
+  memcpy(x_copy15, t0, 4U * sizeof (uint64_t));
+  fsub0(t0, x_copy15, t2);
+  uint64_t x_copy16[4U] = { 0U };
+  memcpy(x_copy16, t0, 4U * sizeof (uint64_t));
+  fmul0(t0, x_copy16, z3);
+  uint64_t x_copy17[4U] = { 0U };
+  memcpy(x_copy17, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, x_copy17, t0);
+  uint64_t x_copy18[4U] = { 0U };
+  memcpy(x_copy18, t4, 4U * sizeof (uint64_t));
+  fadd0(t0, x_copy18, t4);
+  uint64_t y_copy3[4U] = { 0U };
+  memcpy(y_copy3, z3, 4U * sizeof (uint64_t));
+  fmul0(z3, t0, y_copy3);
+  uint64_t x_copy19[4U] = { 0U };
+  memcpy(x_copy19, x3, 4U * sizeof (uint64_t));
+  fsub0(x3, x_copy19, z3);
   fmul0(z3, t0, t1);
-  fadd0(z3, z3, z3);
-  fadd0(z3, z3, z3);
+  uint64_t x_copy20[4U] = { 0U };
+  memcpy(x_copy20, z3, 4U * sizeof (uint64_t));
+  uint64_t x_copy110[4U] = { 0U };
+  memcpy(x_copy110, x_copy20, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy110, x_copy20);
+  uint64_t x_copy21[4U] = { 0U };
+  memcpy(x_copy21, z3, 4U * sizeof (uint64_t));
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, x_copy21, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy1, x_copy21);
 }
 
 static inline void point_add(uint64_t *res, uint64_t *p, uint64_t *q)
@@ -882,52 +1095,100 @@ static inline void point_add(uint64_t *res, uint64_t *p, uint64_t *q)
   fmul0(t2, z10, z20);
   fadd0(t3, x1, y1);
   fadd0(t4, x20, y20);
-  fmul0(t3, t3, t4);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, t3, 4U * sizeof (uint64_t));
+  fmul0(t3, x_copy0, t4);
   fadd0(t4, t01, t11);
   uint64_t *y10 = p + 4U;
   uint64_t *z11 = p + 8U;
   uint64_t *y2 = q + 4U;
   uint64_t *z21 = q + 8U;
-  fsub0(t3, t3, t4);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, t3, 4U * sizeof (uint64_t));
+  fsub0(t3, x_copy, t4);
   fadd0(t4, y10, z11);
   fadd0(t5, y2, z21);
-  fmul0(t4, t4, t5);
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, t4, 4U * sizeof (uint64_t));
+  fmul0(t4, x_copy1, t5);
   fadd0(t5, t11, t2);
-  fsub0(t4, t4, t5);
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, t4, 4U * sizeof (uint64_t));
+  fsub0(t4, x_copy2, t5);
   uint64_t *x10 = p;
   uint64_t *z1 = p + 8U;
   uint64_t *x2 = q;
   uint64_t *z2 = q + 8U;
   fadd0(x3, x10, z1);
   fadd0(y3, x2, z2);
-  fmul0(x3, x3, y3);
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, x3, 4U * sizeof (uint64_t));
+  fmul0(x3, x_copy3, y3);
   fadd0(y3, t01, t2);
-  fsub0(y3, x3, y3);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x3, y_copy);
   fmul_by_b_coeff(z3, t2);
   fsub0(x3, y3, z3);
-  fadd0(z3, x3, x3);
-  fadd0(x3, x3, z3);
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, x3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy4, x3);
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, x3, 4U * sizeof (uint64_t));
+  fadd0(x3, x_copy5, z3);
   fsub0(z3, t11, x3);
-  fadd0(x3, t11, x3);
-  fmul_by_b_coeff(y3, y3);
-  fadd0(t11, t2, t2);
-  fadd0(t2, t11, t2);
-  fsub0(y3, y3, t2);
-  fsub0(y3, y3, t01);
-  fadd0(t11, y3, y3);
-  fadd0(y3, t11, y3);
-  fadd0(t11, t01, t01);
-  fadd0(t01, t11, t01);
-  fsub0(t01, t01, t2);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, x3, 4U * sizeof (uint64_t));
+  fadd0(x3, t11, y_copy0);
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, y3, 4U * sizeof (uint64_t));
+  fmul_by_b_coeff(y3, x_copy6);
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, t2, 4U * sizeof (uint64_t));
+  fadd0(t11, x_copy7, t2);
+  uint64_t y_copy1[4U] = { 0U };
+  memcpy(y_copy1, t2, 4U * sizeof (uint64_t));
+  fadd0(t2, t11, y_copy1);
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x_copy8, t2);
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x_copy9, t01);
+  uint64_t x_copy10[4U] = { 0U };
+  memcpy(x_copy10, y3, 4U * sizeof (uint64_t));
+  fadd0(t11, x_copy10, y3);
+  uint64_t y_copy2[4U] = { 0U };
+  memcpy(y_copy2, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, t11, y_copy2);
+  uint64_t x_copy11[4U] = { 0U };
+  memcpy(x_copy11, t01, 4U * sizeof (uint64_t));
+  fadd0(t11, x_copy11, t01);
+  uint64_t y_copy3[4U] = { 0U };
+  memcpy(y_copy3, t01, 4U * sizeof (uint64_t));
+  fadd0(t01, t11, y_copy3);
+  uint64_t x_copy12[4U] = { 0U };
+  memcpy(x_copy12, t01, 4U * sizeof (uint64_t));
+  fsub0(t01, x_copy12, t2);
   fmul0(t11, t4, y3);
   fmul0(t2, t01, y3);
   fmul0(y3, x3, z3);
-  fadd0(y3, y3, t2);
-  fmul0(x3, t3, x3);
-  fsub0(x3, x3, t11);
-  fmul0(z3, t4, z3);
+  uint64_t x_copy13[4U] = { 0U };
+  memcpy(x_copy13, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, x_copy13, t2);
+  uint64_t y_copy4[4U] = { 0U };
+  memcpy(y_copy4, x3, 4U * sizeof (uint64_t));
+  fmul0(x3, t3, y_copy4);
+  uint64_t x_copy14[4U] = { 0U };
+  memcpy(x_copy14, x3, 4U * sizeof (uint64_t));
+  fsub0(x3, x_copy14, t11);
+  uint64_t y_copy5[4U] = { 0U };
+  memcpy(y_copy5, z3, 4U * sizeof (uint64_t));
+  fmul0(z3, t4, y_copy5);
   fmul0(t11, t3, t01);
-  fadd0(z3, z3, t11);
+  uint64_t x_copy15[4U] = { 0U };
+  memcpy(x_copy15, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy15, t11);
   memcpy(res, t1, 12U * sizeof (uint64_t));
 }
 
@@ -939,23 +1200,35 @@ static inline void point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p)
   uint64_t *t1 = table + 12U;
   make_point_at_inf(t0);
   memcpy(t1, p, 12U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 12U;
-    point_double(tmp, t11);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, t11, 12U * sizeof (uint64_t));
+    point_double(tmp, p_copy0);
     memcpy(table + (2U * i + 2U) * 12U, tmp, 12U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 12U;
-    point_add(tmp, p, t2);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, p, 12U * sizeof (uint64_t));
+    point_add(tmp, p_copy, t2);
     memcpy(table + (2U * i + 3U) * 12U, tmp, 12U * sizeof (uint64_t)););
   make_point_at_inf(res);
   uint64_t tmp0[12U] = { 0U };
   for (uint32_t i0 = 0U; i0 < 64U; i0++)
   {
-    KRML_MAYBE_FOR4(i, 0U, 4U, 1U, point_double(res, res););
+    KRML_MAYBE_FOR4(i,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[12U] = { 0U };
+      memcpy(p_copy, res, 12U * sizeof (uint64_t));
+      point_double(res, p_copy););
     uint32_t k = 256U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 12U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -967,10 +1240,12 @@ static inline void point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p)
         0U,
         12U,
         1U,
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;););
-    point_add(res, res, tmp0);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy, tmp0);
   }
 }
 
@@ -987,8 +1262,8 @@ static inline void precomp_get_consttime(const uint64_t *table, uint64_t bits_l,
       0U,
       12U,
       1U,
-      uint64_t *os = tmp;
       uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+      uint64_t *os = tmp;
       os[i] = x;););
 }
 
@@ -1030,23 +1305,41 @@ static inline void point_mul_g(uint64_t *res, uint64_t *scalar)
     0U,
     16U,
     1U,
-    KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, point_double(res, res););
+    KRML_MAYBE_FOR4(i0,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[12U] = { 0U };
+      memcpy(p_copy, res, 12U * sizeof (uint64_t));
+      point_double(res, p_copy););
     uint32_t k = 64U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r4, k, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
-    point_add(res, res, tmp);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy, tmp);
     uint32_t k0 = 64U - 4U * i - 4U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r3, k0, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
-    point_add(res, res, tmp);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy0, tmp);
     uint32_t k1 = 64U - 4U * i - 4U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r2, k1, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
-    point_add(res, res, tmp);
+    uint64_t p_copy1[12U] = { 0U };
+    memcpy(p_copy1, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy1, tmp);
     uint32_t k2 = 64U - 4U * i - 4U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r1, k2, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_basepoint_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
-    point_add(res, res, tmp););
+    uint64_t p_copy2[12U] = { 0U };
+    memcpy(p_copy2, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy2, tmp););
   KRML_MAYBE_UNUSED_VAR(q1);
   KRML_MAYBE_UNUSED_VAR(q2);
   KRML_MAYBE_UNUSED_VAR(q3);
@@ -1064,15 +1357,20 @@ point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t
   uint64_t *t1 = table2 + 12U;
   make_point_at_inf(t0);
   memcpy(t1, q2, 12U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 12U;
-    point_double(tmp, t11);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, t11, 12U * sizeof (uint64_t));
+    point_double(tmp, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 12U, tmp, 12U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 12U;
-    point_add(tmp, q2, t2);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, q2, 12U * sizeof (uint64_t));
+    point_add(tmp, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 12U, tmp, 12U * sizeof (uint64_t)););
   uint64_t tmp0[12U] = { 0U };
   uint32_t i0 = 255U;
@@ -1085,25 +1383,39 @@ point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t
   uint32_t bits_l320 = (uint32_t)bits_c0;
   const uint64_t *a_bits_l0 = table2 + bits_l320 * 12U;
   memcpy(tmp0, (uint64_t *)a_bits_l0, 12U * sizeof (uint64_t));
-  point_add(res, res, tmp0);
+  uint64_t p_copy[12U] = { 0U };
+  memcpy(p_copy, res, 12U * sizeof (uint64_t));
+  point_add(res, p_copy, tmp0);
   uint64_t tmp1[12U] = { 0U };
   for (uint32_t i = 0U; i < 51U; i++)
   {
-    KRML_MAYBE_FOR5(i2, 0U, 5U, 1U, point_double(res, res););
+    KRML_MAYBE_FOR5(i2,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy0[12U] = { 0U };
+      memcpy(p_copy0, res, 12U * sizeof (uint64_t));
+      point_double(res, p_copy0););
     uint32_t k = 255U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar2, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l321 = (uint32_t)bits_l;
     const uint64_t *a_bits_l1 = table2 + bits_l321 * 12U;
     memcpy(tmp1, (uint64_t *)a_bits_l1, 12U * sizeof (uint64_t));
-    point_add(res, res, tmp1);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy0, tmp1);
     uint32_t k0 = 255U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar1, k0, 5U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l322 = (uint32_t)bits_l0;
     const
     uint64_t
     *a_bits_l2 = Hacl_P256_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * 12U;
     memcpy(tmp1, (uint64_t *)a_bits_l2, 12U * sizeof (uint64_t));
-    point_add(res, res, tmp1);
+    uint64_t p_copy1[12U] = { 0U };
+    memcpy(p_copy1, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy1, tmp1);
   }
 }
 
@@ -1111,8 +1423,11 @@ static inline uint64_t bn_is_lt_order_mask4(uint64_t *f)
 {
   uint64_t tmp[4U] = { 0U };
   make_order(tmp);
-  uint64_t c = bn_sub4(tmp, f, tmp);
-  return 0ULL - c;
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, tmp, 4U * sizeof (uint64_t));
+  uint64_t c = bn_sub4(tmp, f, y_copy);
+  uint64_t c0 = c;
+  return 0ULL - c0;
 }
 
 static inline uint64_t bn_is_lt_order_and_gt_zero_mask4(uint64_t *f)
@@ -1126,8 +1441,11 @@ static inline void qmod_short(uint64_t *res, uint64_t *x)
 {
   uint64_t tmp[4U] = { 0U };
   make_order(tmp);
-  uint64_t c = bn_sub4(tmp, x, tmp);
-  bn_cmovznz4(res, c, tmp, x);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, tmp, 4U * sizeof (uint64_t));
+  uint64_t c = bn_sub4(tmp, x, y_copy);
+  uint64_t c0 = c;
+  bn_cmovznz4(res, c0, tmp, x);
 }
 
 static inline void qadd(uint64_t *res, uint64_t *x, uint64_t *y)
@@ -1165,8 +1483,8 @@ static inline void qmont_reduction(uint64_t *res, uint64_t *x)
     }
     uint64_t r = c;
     uint64_t c1 = r;
-    uint64_t *resb = x + 4U + i0;
     uint64_t res_j = x[4U + i0];
+    uint64_t *resb = x + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
   memcpy(res, x + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -1196,8 +1514,8 @@ static inline void qmont_reduction(uint64_t *res, uint64_t *x)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -1238,9 +1556,9 @@ bool Hacl_Impl_P256_DH_ecp256dh_i(uint8_t *public_key, uint8_t *private_key)
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   point_mul_g(pk, sk);
@@ -1270,9 +1588,9 @@ Hacl_Impl_P256_DH_ecp256dh_r(
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   uint64_t ss_proj[12U] = { 0U };
@@ -1296,98 +1614,348 @@ static inline void qinv(uint64_t *res, uint64_t *r)
   uint64_t *x_101111 = tmp + 24U;
   memcpy(x6, r, 4U * sizeof (uint64_t));
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_11, x6, r);
   qmul(x_101, x6, x_11);
   qmul(x_111, x6, x_101);
   memcpy(x6, x_101, 4U * sizeof (uint64_t));
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_1111, x_101, x6);
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_10101, x6, r);
   memcpy(x6, x_10101, 4U * sizeof (uint64_t));
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_101111, x_101, x6);
-  qmul(x6, x_10101, x6);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, x6, 4U * sizeof (uint64_t));
+  qmul(x6, x_10101, y_copy);
   uint64_t tmp1[4U] = { 0U };
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, qsqr(x6, x6););
-  qmul(x6, x6, x_11);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy););
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+  qmul(x6, x_copy, x_11);
   memcpy(tmp1, x6, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR8(i, 0U, 8U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x6);
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint64_t x_copy0[4U] = { 0U };
+    memcpy(x_copy0, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy0););
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy0, x6);
   memcpy(x6, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR16(i, 0U, 16U, 1U, qsqr(x6, x6););
-  qmul(x6, x6, tmp1);
+  KRML_MAYBE_FOR16(i,
+    0U,
+    16U,
+    1U,
+    uint64_t x_copy1[4U] = { 0U };
+    memcpy(x_copy1, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy1););
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, x6, 4U * sizeof (uint64_t));
+  qmul(x6, x_copy1, tmp1);
   memcpy(tmp1, x6, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    qsqr(tmp1, tmp1);
+    uint64_t x_copy2[4U] = { 0U };
+    memcpy(x_copy2, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy2);
   }
-  qmul(tmp1, tmp1, x6);
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy2, x6);
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    qsqr(tmp1, tmp1);
+    uint64_t x_copy3[4U] = { 0U };
+    memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy3);
   }
-  qmul(tmp1, tmp1, x6);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_10101);
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR9(i, 0U, 9U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101111);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, r);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, r);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR10(i, 0U, 10U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101111);
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, r);
-  KRML_MAYBE_FOR7(i, 0U, 7U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_10101);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy3, x6);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy4[4U] = { 0U };
+    memcpy(x_copy4, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy4););
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy4, x_101111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy5[4U] = { 0U };
+    memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy5););
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy5, x_111);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy6[4U] = { 0U };
+    memcpy(x_copy6, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy6););
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy6, x_11);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy7[4U] = { 0U };
+    memcpy(x_copy7, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy7););
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy7, x_1111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy8[4U] = { 0U };
+    memcpy(x_copy8, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy8););
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy8, x_10101);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy9[4U] = { 0U };
+    memcpy(x_copy9, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy9););
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy9, x_101);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy10[4U] = { 0U };
+    memcpy(x_copy10, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy10););
+  uint64_t x_copy10[4U] = { 0U };
+  memcpy(x_copy10, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy10, x_101);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy11[4U] = { 0U };
+    memcpy(x_copy11, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy11););
+  uint64_t x_copy11[4U] = { 0U };
+  memcpy(x_copy11, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy11, x_101);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy12[4U] = { 0U };
+    memcpy(x_copy12, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy12););
+  uint64_t x_copy12[4U] = { 0U };
+  memcpy(x_copy12, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy12, x_111);
+  KRML_MAYBE_FOR9(i,
+    0U,
+    9U,
+    1U,
+    uint64_t x_copy13[4U] = { 0U };
+    memcpy(x_copy13, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy13););
+  uint64_t x_copy13[4U] = { 0U };
+  memcpy(x_copy13, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy13, x_101111);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy14[4U] = { 0U };
+    memcpy(x_copy14, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy14););
+  uint64_t x_copy14[4U] = { 0U };
+  memcpy(x_copy14, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy14, x_1111);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy15[4U] = { 0U };
+    memcpy(x_copy15, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy15););
+  uint64_t x_copy15[4U] = { 0U };
+  memcpy(x_copy15, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy15, r);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy16[4U] = { 0U };
+    memcpy(x_copy16, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy16););
+  uint64_t x_copy16[4U] = { 0U };
+  memcpy(x_copy16, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy16, r);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy17[4U] = { 0U };
+    memcpy(x_copy17, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy17););
+  uint64_t x_copy17[4U] = { 0U };
+  memcpy(x_copy17, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy17, x_1111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy18[4U] = { 0U };
+    memcpy(x_copy18, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy18););
+  uint64_t x_copy18[4U] = { 0U };
+  memcpy(x_copy18, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy18, x_111);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy19[4U] = { 0U };
+    memcpy(x_copy19, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy19););
+  uint64_t x_copy19[4U] = { 0U };
+  memcpy(x_copy19, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy19, x_111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy20[4U] = { 0U };
+    memcpy(x_copy20, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy20););
+  uint64_t x_copy20[4U] = { 0U };
+  memcpy(x_copy20, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy20, x_111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy21[4U] = { 0U };
+    memcpy(x_copy21, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy21););
+  uint64_t x_copy21[4U] = { 0U };
+  memcpy(x_copy21, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy21, x_101);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy22[4U] = { 0U };
+    memcpy(x_copy22, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy22););
+  uint64_t x_copy22[4U] = { 0U };
+  memcpy(x_copy22, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy22, x_11);
+  KRML_MAYBE_FOR10(i,
+    0U,
+    10U,
+    1U,
+    uint64_t x_copy23[4U] = { 0U };
+    memcpy(x_copy23, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy23););
+  uint64_t x_copy23[4U] = { 0U };
+  memcpy(x_copy23, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy23, x_101111);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy24[4U] = { 0U };
+    memcpy(x_copy24, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy24););
+  uint64_t x_copy24[4U] = { 0U };
+  memcpy(x_copy24, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy24, x_11);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy25[4U] = { 0U };
+    memcpy(x_copy25, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy25););
+  uint64_t x_copy25[4U] = { 0U };
+  memcpy(x_copy25, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy25, x_11);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy26[4U] = { 0U };
+    memcpy(x_copy26, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy26););
+  uint64_t x_copy26[4U] = { 0U };
+  memcpy(x_copy26, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy26, x_11);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy27[4U] = { 0U };
+    memcpy(x_copy27, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy27););
+  uint64_t x_copy27[4U] = { 0U };
+  memcpy(x_copy27, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy27, r);
+  KRML_MAYBE_FOR7(i,
+    0U,
+    7U,
+    1U,
+    uint64_t x_copy28[4U] = { 0U };
+    memcpy(x_copy28, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy28););
+  uint64_t x_copy28[4U] = { 0U };
+  memcpy(x_copy28, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy28, x_10101);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy29[4U] = { 0U };
+    memcpy(x_copy29, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy29););
+  uint64_t x_copy29[4U] = { 0U };
+  memcpy(x_copy29, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy29, x_1111);
   memcpy(x6, tmp1, 4U * sizeof (uint64_t));
   memcpy(res, x6, 4U * sizeof (uint64_t));
 }
@@ -1435,7 +2003,9 @@ ecdsa_verify_msg_as_qelem(
   }
   uint64_t x[4U] = { 0U };
   to_aff_point_x(x, res);
-  qmod_short(x, x);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, x, 4U * sizeof (uint64_t));
+  qmod_short(x, x_copy);
   bool res1 = bn_is_eq_vartime4(x, r_q);
   return res1;
 }
@@ -1464,9 +2034,9 @@ ecdsa_sign_msg_as_qelem(
     0U,
     4U,
     1U,
-    uint64_t *os = d_a;
     uint64_t uu____0 = oneq0[i];
     uint64_t x = uu____0 ^ (is_b_valid0 & (d_a[i] ^ uu____0));
+    uint64_t *os = d_a;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid0;
   bn_from_bytes_be4(k_q, nonce);
@@ -1480,22 +2050,30 @@ ecdsa_sign_msg_as_qelem(
     0U,
     4U,
     1U,
-    uint64_t *os = k_q;
     uint64_t uu____1 = oneq[i];
     uint64_t x = uu____1 ^ (is_b_valid & (k_q[i] ^ uu____1));
+    uint64_t *os = k_q;
     os[i] = x;);
   uint64_t is_nonce_valid = is_b_valid;
   uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid;
   uint64_t p[12U] = { 0U };
   point_mul_g(p, k_q);
   to_aff_point_x(r_q, p);
-  qmod_short(r_q, r_q);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, r_q, 4U * sizeof (uint64_t));
+  qmod_short(r_q, x_copy0);
   uint64_t kinv[4U] = { 0U };
   qinv(kinv, k_q);
   qmul(s_q, r_q, d_a);
-  from_qmont(m_q, m_q);
-  qadd(s_q, m_q, s_q);
-  qmul(s_q, kinv, s_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  from_qmont(m_q, x_copy);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, s_q, 4U * sizeof (uint64_t));
+  qadd(s_q, m_q, y_copy);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, s_q, 4U * sizeof (uint64_t));
+  qmul(s_q, kinv, y_copy0);
   bn2_to_bytes_be4(signature, r_q, s_q);
   uint64_t is_r_zero = bn_is_zero_mask4(r_q);
   uint64_t is_s_zero = bn_is_zero_mask4(s_q);
@@ -1551,7 +2129,9 @@ Hacl_P256_ecdsa_sign_p256_sha2(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1584,7 +2164,9 @@ Hacl_P256_ecdsa_sign_p256_sha384(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1617,7 +2199,9 @@ Hacl_P256_ecdsa_sign_p256_sha512(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1660,7 +2244,9 @@ Hacl_P256_ecdsa_sign_p256_without_hash(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1696,7 +2282,9 @@ Hacl_P256_ecdsa_verif_p256_sha2(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1727,7 +2315,9 @@ Hacl_P256_ecdsa_verif_p256_sha384(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1758,7 +2348,9 @@ Hacl_P256_ecdsa_verif_p256_sha512(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1794,7 +2386,9 @@ Hacl_P256_ecdsa_verif_without_hash(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1891,8 +2485,8 @@ bool Hacl_P256_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw)
 {
   uint64_t xa[4U] = { 0U };
   uint64_t ya[4U] = { 0U };
-  uint8_t *pk_xb = pk + 1U;
   bool b = aff_point_decompress_vartime(xa, ya, pk);
+  uint8_t *pk_xb = pk + 1U;
   if (b)
   {
     memcpy(pk_raw, pk_xb, 32U * sizeof (uint8_t));
diff --git a/src/Hacl_RSAPSS.c b/src/Hacl_RSAPSS.c
index 71e141d0..8b08977d 100644
--- a/src/Hacl_RSAPSS.c
+++ b/src/Hacl_RSAPSS.c
@@ -252,8 +252,8 @@ pss_encode(
   mgf_hash(a, hLen, m1Hash, dbLen, dbMask);
   for (uint32_t i = 0U; i < dbLen; i++)
   {
-    uint8_t *os = db;
     uint8_t x = (uint32_t)db[i] ^ (uint32_t)dbMask[i];
+    uint8_t *os = db;
     os[i] = x;
   }
   uint32_t msBits = emBits % 8U;
@@ -310,8 +310,8 @@ pss_verify(
   mgf_hash(a, hLen, m1Hash, dbLen, dbMask);
   for (uint32_t i = 0U; i < dbLen; i++)
   {
-    uint8_t *os = dbMask;
     uint8_t x = (uint32_t)dbMask[i] ^ (uint32_t)maskedDB[i];
+    uint8_t *os = dbMask;
     os[i] = x;
   }
   uint32_t msBits1 = emBits % 8U;
@@ -486,9 +486,9 @@ Hacl_RSAPSS_rsapss_sign(
     uint64_t eq_m = mask1;
     for (uint32_t i = 0U; i < nLen2; i++)
     {
-      uint64_t *os = s;
       uint64_t x = s[i];
       uint64_t x0 = eq_m & x;
+      uint64_t *os = s;
       os[i] = x0;
     }
     bool eq_b = eq_m == 0xFFFFFFFFFFFFFFFFULL;
diff --git a/src/Hacl_SHA2_Vec128.c b/src/Hacl_SHA2_Vec128.c
index 02af75b1..e122dd8c 100644
--- a/src/Hacl_SHA2_Vec128.c
+++ b/src/Hacl_SHA2_Vec128.c
@@ -35,9 +35,9 @@ static inline void sha224_init4(Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h224[i];
     Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_load32(hi);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
@@ -286,9 +286,9 @@ sha224_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     Lib_IntVector_Intrinsics_vec128
     x = Lib_IntVector_Intrinsics_vec128_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
@@ -515,9 +515,9 @@ static inline void sha256_init4(Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h256[i];
     Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_load32(hi);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
@@ -766,9 +766,9 @@ sha256_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     Lib_IntVector_Intrinsics_vec128
     x = Lib_IntVector_Intrinsics_vec128_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
diff --git a/src/Hacl_SHA2_Vec256.c b/src/Hacl_SHA2_Vec256.c
index c34767f5..2bee1692 100644
--- a/src/Hacl_SHA2_Vec256.c
+++ b/src/Hacl_SHA2_Vec256.c
@@ -36,9 +36,9 @@ static inline void sha224_init8(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h224[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load32(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -371,9 +371,9 @@ sha224_update8(Hacl_Hash_SHA2_uint8_8p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -785,9 +785,9 @@ static inline void sha256_init8(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h256[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load32(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1120,9 +1120,9 @@ sha256_update8(Hacl_Hash_SHA2_uint8_8p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1534,9 +1534,9 @@ static inline void sha384_init4(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint64_t hi = Hacl_Hash_SHA2_h384[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load64(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1769,9 +1769,9 @@ sha384_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add64(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1990,9 +1990,9 @@ static inline void sha512_init4(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint64_t hi = Hacl_Hash_SHA2_h512[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load64(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -2225,9 +2225,9 @@ sha512_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add64(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
diff --git a/src/Hacl_Salsa20.c b/src/Hacl_Salsa20.c
index 151df07d..372fd3c5 100644
--- a/src/Hacl_Salsa20.c
+++ b/src/Hacl_Salsa20.c
@@ -85,8 +85,8 @@ static inline void salsa20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr)
     0U,
     16U,
     1U,
-    uint32_t *os = k;
     uint32_t x = k[i] + ctx[i];
+    uint32_t *os = k;
     os[i] = x;);
   k[8U] = k[8U] + ctr_u32;
 }
@@ -101,21 +101,21 @@ static inline void salsa20_key_block0(uint8_t *out, uint8_t *key, uint8_t *n)
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   ctx[0U] = 0x61707865U;
   uint32_t *k0 = k32;
@@ -149,21 +149,21 @@ salsa20_encrypt(
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   ctx[0U] = 0x61707865U;
   uint32_t *k0 = k32;
@@ -192,18 +192,18 @@ salsa20_encrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = uu____1 + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(uu____0 + i * 4U, bl[i]););
   }
@@ -219,18 +219,18 @@ salsa20_encrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = plain + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(plain + i * 4U, bl[i]););
     memcpy(uu____2, plain, rem * sizeof (uint8_t));
@@ -254,21 +254,21 @@ salsa20_decrypt(
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   ctx[0U] = 0x61707865U;
   uint32_t *k0 = k32;
@@ -297,18 +297,18 @@ salsa20_decrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = uu____1 + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(uu____0 + i * 4U, bl[i]););
   }
@@ -324,18 +324,18 @@ salsa20_decrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = plain + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(plain + i * 4U, bl[i]););
     memcpy(uu____2, plain, rem * sizeof (uint8_t));
@@ -351,21 +351,21 @@ static inline void hsalsa20(uint8_t *out, uint8_t *key, uint8_t *n)
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   uint32_t *k0 = k32;
   uint32_t *k1 = k32 + 4U;
diff --git a/src/msvc/EverCrypt_HMAC.c b/src/msvc/EverCrypt_HMAC.c
index 386cb17f..649f1e15 100644
--- a/src/msvc/EverCrypt_HMAC.c
+++ b/src/msvc/EverCrypt_HMAC.c
@@ -620,7 +620,7 @@ EverCrypt_HMAC_compute_blake2s(
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -655,6 +655,7 @@ EverCrypt_HMAC_compute_blake2s(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
@@ -693,6 +694,7 @@ EverCrypt_HMAC_compute_blake2s(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -757,7 +759,13 @@ EverCrypt_HMAC_compute_blake2b(
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -792,6 +800,7 @@ EverCrypt_HMAC_compute_blake2b(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
@@ -831,6 +840,7 @@ EverCrypt_HMAC_compute_blake2b(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/msvc/EverCrypt_Hash.c b/src/msvc/EverCrypt_Hash.c
index bfafa9be..153063cc 100644
--- a/src/msvc/EverCrypt_Hash.c
+++ b/src/msvc/EverCrypt_Hash.c
@@ -616,7 +616,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
   {
     uint32_t *p1 = scrut.case_Blake2S_s;
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
   }
   if (scrut.tag == Blake2S_128_s)
@@ -624,7 +624,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Lib_IntVector_Intrinsics_vec128 *p1 = scrut.case_Blake2S_128_s;
     #if HACL_CAN_COMPILE_VEC128
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
     #else
     KRML_MAYBE_UNUSED_VAR(p1);
@@ -638,6 +638,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
@@ -651,6 +652,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_Simd256_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
diff --git a/src/msvc/Hacl_Bignum32.c b/src/msvc/Hacl_Bignum32.c
index 55c3f90c..f62abd35 100644
--- a/src/msvc/Hacl_Bignum32.c
+++ b/src/msvc/Hacl_Bignum32.c
@@ -46,9 +46,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -60,7 +69,16 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -70,12 +88,23 @@ uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -85,12 +114,23 @@ void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b,
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -100,8 +140,13 @@ void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b,
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -114,8 +159,10 @@ void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res)
 {
@@ -149,13 +196,19 @@ bn_slow_precomp(
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -195,22 +248,30 @@ bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -238,22 +299,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -281,18 +350,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -393,15 +467,16 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n)
@@ -429,7 +504,7 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
@@ -444,9 +519,11 @@ void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -464,21 +541,25 @@ Hacl_Bignum32_mod_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -505,21 +586,25 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -546,14 +631,17 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -623,13 +711,13 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 {
@@ -664,13 +752,13 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 {
@@ -707,8 +795,11 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -727,8 +818,11 @@ void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -753,7 +847,11 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
@@ -770,7 +868,11 @@ uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
diff --git a/src/msvc/Hacl_HMAC.c b/src/msvc/Hacl_HMAC.c
index 63ab2032..32dab3a5 100644
--- a/src/msvc/Hacl_HMAC.c
+++ b/src/msvc/Hacl_HMAC.c
@@ -609,7 +609,7 @@ Hacl_HMAC_compute_blake2s_32(
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -644,6 +644,7 @@ Hacl_HMAC_compute_blake2s_32(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
@@ -682,6 +683,7 @@ Hacl_HMAC_compute_blake2s_32(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -752,7 +754,13 @@ Hacl_HMAC_compute_blake2b_32(
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -787,6 +795,7 @@ Hacl_HMAC_compute_blake2b_32(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
@@ -826,6 +835,7 @@ Hacl_HMAC_compute_blake2b_32(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/msvc/Hacl_HMAC_Blake2b_256.c b/src/msvc/Hacl_HMAC_Blake2b_256.c
index cd16e65e..5e7605bf 100644
--- a/src/msvc/Hacl_HMAC_Blake2b_256.c
+++ b/src/msvc/Hacl_HMAC_Blake2b_256.c
@@ -96,6 +96,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
     Hacl_Hash_Blake2b_Simd256_update_last(128U,
       wv,
       s0,
+      false,
       FStar_UInt128_uint64_to_uint128(0ULL),
       128U,
       ipad);
@@ -138,6 +139,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
     Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
@@ -182,6 +184,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/msvc/Hacl_HMAC_Blake2s_128.c b/src/msvc/Hacl_HMAC_Blake2s_128.c
index bf2033a8..f9fa97e6 100644
--- a/src/msvc/Hacl_HMAC_Blake2s_128.c
+++ b/src/msvc/Hacl_HMAC_Blake2s_128.c
@@ -92,7 +92,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   if (data_len == 0U)
   {
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -127,6 +127,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
     Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
@@ -165,6 +166,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
diff --git a/src/msvc/Hacl_Hash_Blake2b.c b/src/msvc/Hacl_Hash_Blake2b.c
index d490a1a5..e13f16fd 100644
--- a/src/msvc/Hacl_Hash_Blake2b.c
+++ b/src/msvc/Hacl_Hash_Blake2b.c
@@ -29,7 +29,14 @@
 #include "lib_memzero0.h"
 
 static void
-update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totlen, uint8_t *d)
+update_block(
+  uint64_t *wv,
+  uint64_t *hash,
+  bool flag,
+  bool last_node,
+  FStar_UInt128_uint128 totlen,
+  uint8_t *d
+)
 {
   uint64_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
@@ -52,7 +59,15 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask[0U] = FStar_UInt128_uint128_to_uint64(totlen);
   mask[1U] = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U));
   mask[2U] = wv_14;
@@ -647,11 +662,11 @@ static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -674,7 +689,7 @@ Hacl_Hash_Blake2b_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -683,6 +698,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -693,7 +709,7 @@ Hacl_Hash_Blake2b_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -727,7 +743,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -762,16 +778,19 @@ void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash)
 }
 
 static Hacl_Hash_Blake2b_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -790,7 +809,8 @@ static Hacl_Hash_Blake2b_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -800,7 +820,7 @@ static Hacl_Hash_Blake2b_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
@@ -820,14 +840,16 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -844,7 +866,7 @@ The caller must satisfy the following requirements.
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
   uint8_t salt[16U] = { 0U };
   uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
@@ -855,7 +877,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, k);
+  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
@@ -872,28 +894,30 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s)
 {
   Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2b_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -903,7 +927,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -921,7 +945,7 @@ reset_raw(
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -934,7 +958,7 @@ Hacl_Hash_Blake2b_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -957,7 +981,7 @@ void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
@@ -1040,7 +1064,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      K____uint64_t___uint64_t_ acc = block_state1.f3;
       uint64_t *wv = acc.fst;
       uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1065,7 +1089,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    K____uint64_t___uint64_t_ acc = block_state1.f3;
     uint64_t *wv = acc.fst;
     uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1133,7 +1157,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      K____uint64_t___uint64_t_ acc = block_state1.f3;
       uint64_t *wv = acc.fst;
       uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1159,7 +1183,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    K____uint64_t___uint64_t_ acc = block_state1.f3;
     uint64_t *wv = acc.fst;
     uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1190,16 +1214,20 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_state_t scrut = *state;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_state_t scrut = *s;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1217,9 +1245,14 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   uint64_t b[16U] = { 0U };
   Hacl_Hash_Blake2b_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint64_t *src_b = block_state.thd.snd;
-  uint64_t *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  uint64_t *src_b = block_state.f3.snd;
+  uint64_t *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1233,7 +1266,7 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  K____uint64_t___uint64_t_ acc0 = tmp_block_state.thd;
+  K____uint64_t___uint64_t_ acc0 = tmp_block_state.f3;
   uint64_t *wv1 = acc0.fst;
   uint64_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
@@ -1244,17 +1277,35 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint64_t___uint64_t_ acc = tmp_block_state.thd;
+  K____uint64_t___uint64_t_ acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   uint64_t *wv = acc.fst;
   uint64_t *hash = acc.snd;
   Hacl_Hash_Blake2b_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2b_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1265,8 +1316,8 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint64_t *b = block_state.thd.snd;
-  uint64_t *wv = block_state.thd.fst;
+  uint64_t *b = block_state.f3.snd;
+  uint64_t *wv = block_state.f3.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1282,17 +1333,24 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *sta
   Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint64_t *src_b = block_state0.thd.snd;
-  uint64_t *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  uint64_t *src_b = block_state0.f3.snd;
+  uint64_t *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   Hacl_Hash_Blake2b_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1335,10 +1393,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_Blake2b_Simd256.c b/src/msvc/Hacl_Hash_Blake2b_Simd256.c
index 0afd93bc..35608aea 100644
--- a/src/msvc/Hacl_Hash_Blake2b_Simd256.c
+++ b/src/msvc/Hacl_Hash_Blake2b_Simd256.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
   bool flag,
+  bool last_node,
   FStar_UInt128_uint128 totlen,
   uint8_t *d
 )
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec256_load64s(FStar_UInt128_uint128_to_uint64(totlen),
       FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U)),
@@ -372,11 +381,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -399,7 +408,7 @@ Hacl_Hash_Blake2b_Simd256_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -408,6 +417,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -418,7 +428,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -452,7 +462,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_Simd256_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -593,10 +603,7 @@ Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2b_Simd256_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -610,7 +617,13 @@ static Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -632,7 +645,8 @@ static Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -642,42 +656,56 @@ static Hacl_Hash_Blake2b_Simd256_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -685,21 +713,16 @@ Hacl_Hash_Blake2b_Simd256_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2b_Simd256_state_t
-  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 {
@@ -709,28 +732,31 @@ Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2b_Simd256_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -740,7 +766,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -757,9 +783,11 @@ reset_raw(
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -769,14 +797,15 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k)
 {
@@ -791,11 +820,16 @@ void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
@@ -803,7 +837,7 @@ void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -873,8 +907,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -899,7 +932,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -967,8 +1000,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -994,7 +1026,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1020,16 +1052,25 @@ Hacl_Hash_Blake2b_Simd256_update(
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_Simd256_state_t scrut = *s;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1047,9 +1088,14 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
   Hacl_Hash_Blake2b_Simd256_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1064,7 +1110,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
   K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc0 = tmp_block_state.thd;
+  acc0 = tmp_block_state.f3;
   Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst;
   Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd;
   uint32_t nb = 0U;
@@ -1076,17 +1122,35 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc = tmp_block_state.thd;
+  acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
   Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
   Hacl_Hash_Blake2b_Simd256_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1097,8 +1161,8 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec256 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec256 *b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1106,7 +1170,7 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state)
@@ -1115,9 +1179,10 @@ Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -1131,9 +1196,15 @@ Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1175,8 +1246,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
 }
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_Blake2s.c b/src/msvc/Hacl_Hash_Blake2s.c
index 6e19d83d..167f38fb 100644
--- a/src/msvc/Hacl_Hash_Blake2s.c
+++ b/src/msvc/Hacl_Hash_Blake2s.c
@@ -30,7 +30,14 @@
 #include "lib_memzero0.h"
 
 static inline void
-update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *d)
+update_block(
+  uint32_t *wv,
+  uint32_t *hash,
+  bool flag,
+  bool last_node,
+  uint64_t totlen,
+  uint8_t *d
+)
 {
   uint32_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
@@ -53,7 +60,15 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask[0U] = (uint32_t)totlen;
   mask[1U] = (uint32_t)(totlen >> 32U);
   mask[2U] = wv_14;
@@ -642,11 +657,11 @@ static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -666,7 +681,7 @@ Hacl_Hash_Blake2s_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -675,6 +690,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -684,7 +700,7 @@ Hacl_Hash_Blake2s_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -712,7 +728,7 @@ update_blocks(uint32_t len, uint32_t *wv, uint32_t *hash, uint64_t prev, uint8_t
     rem = rem0;
   }
   Hacl_Hash_Blake2s_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -747,16 +763,19 @@ void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash)
 }
 
 static Hacl_Hash_Blake2s_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -775,7 +794,8 @@ static Hacl_Hash_Blake2s_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -785,38 +805,55 @@ static Hacl_Hash_Blake2s_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -824,20 +861,15 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
-  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 {
@@ -847,28 +879,30 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s)
 {
   Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2s_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -878,7 +912,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -895,9 +929,11 @@ reset_raw(
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -907,14 +943,15 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
 {
@@ -929,11 +966,16 @@ void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 {
@@ -941,7 +983,7 @@ void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len)
@@ -1007,7 +1049,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      K____uint32_t___uint32_t_ acc = block_state1.f3;
       uint32_t *wv = acc.fst;
       uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1027,7 +1069,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    K____uint32_t___uint32_t_ acc = block_state1.f3;
     uint32_t *wv = acc.fst;
     uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -1090,7 +1132,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      K____uint32_t___uint32_t_ acc = block_state1.f3;
       uint32_t *wv = acc.fst;
       uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1111,7 +1153,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    K____uint32_t___uint32_t_ acc = block_state1.f3;
     uint32_t *wv = acc.fst;
     uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -1132,15 +1174,25 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_state_t scrut = *state;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_state_t scrut = *s;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1158,9 +1210,14 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   uint32_t b[16U] = { 0U };
   Hacl_Hash_Blake2s_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint32_t *src_b = block_state.thd.snd;
-  uint32_t *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  uint32_t *src_b = block_state.f3.snd;
+  uint32_t *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1174,18 +1231,35 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  K____uint32_t___uint32_t_ acc0 = tmp_block_state.thd;
+  K____uint32_t___uint32_t_ acc0 = tmp_block_state.f3;
   uint32_t *wv1 = acc0.fst;
   uint32_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint32_t___uint32_t_ acc = tmp_block_state.thd;
+  K____uint32_t___uint32_t_ acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   uint32_t *wv = acc.fst;
   uint32_t *hash = acc.snd;
-  Hacl_Hash_Blake2s_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Hacl_Hash_Blake2s_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2s_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1196,8 +1270,8 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint32_t *b = block_state.thd.snd;
-  uint32_t *wv = block_state.thd.fst;
+  uint32_t *b = block_state.f3.snd;
+  uint32_t *wv = block_state.f3.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1205,7 +1279,7 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state)
 {
@@ -1213,17 +1287,24 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *sta
   Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint32_t *src_b = block_state0.thd.snd;
-  uint32_t *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  uint32_t *src_b = block_state0.f3.snd;
+  uint32_t *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   Hacl_Hash_Blake2s_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1262,8 +1343,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_Blake2s_Simd128.c b/src/msvc/Hacl_Hash_Blake2s_Simd128.c
index c02da8fa..a85b18a4 100644
--- a/src/msvc/Hacl_Hash_Blake2s_Simd128.c
+++ b/src/msvc/Hacl_Hash_Blake2s_Simd128.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
   bool flag,
+  bool last_node,
   uint64_t totlen,
   uint8_t *d
 )
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)totlen,
       (uint32_t)(totlen >> 32U),
@@ -366,11 +375,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -390,7 +399,7 @@ Hacl_Hash_Blake2s_Simd128_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -399,6 +408,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -408,7 +418,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -442,7 +452,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2s_Simd128_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -583,10 +593,7 @@ Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2s_Simd128_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -600,7 +607,13 @@ static Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -622,7 +635,8 @@ static Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -632,42 +646,56 @@ static Hacl_Hash_Blake2s_Simd128_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -675,21 +703,16 @@ Hacl_Hash_Blake2s_Simd128_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2s_Simd128_state_t
-  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 {
@@ -699,28 +722,31 @@ Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2s_Simd128_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -730,7 +756,7 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  init_with_params(block_state.f3.snd, pv);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -747,9 +773,11 @@ reset_raw(
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -759,14 +787,15 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
 )
 {
   index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k)
 {
@@ -781,11 +810,16 @@ void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
@@ -793,7 +827,7 @@ void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -863,8 +897,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -884,7 +917,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -947,8 +980,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -969,7 +1001,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -990,16 +1022,25 @@ Hacl_Hash_Blake2s_Simd128_update(
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_Simd128_state_t scrut = *s;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1017,9 +1058,14 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
   Hacl_Hash_Blake2s_Simd128_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1034,19 +1080,36 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
   K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc0 = tmp_block_state.thd;
+  acc0 = tmp_block_state.f3;
   Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst;
   Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc = tmp_block_state.thd;
+  acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
   Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
-  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1057,8 +1120,8 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec128 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec128 *b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1066,7 +1129,7 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state)
@@ -1075,9 +1138,10 @@ Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -1091,9 +1155,15 @@ Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1135,8 +1205,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_SHA3.c b/src/msvc/Hacl_Hash_SHA3.c
index 89bb0491..9cf5abb3 100644
--- a/src/msvc/Hacl_Hash_SHA3.c
+++ b/src/msvc/Hacl_Hash_SHA3.c
@@ -2166,7 +2166,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s)
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -2191,14 +2191,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/src/msvc/Hacl_Hash_SHA3_Simd256.c b/src/msvc/Hacl_Hash_SHA3_Simd256.c
index 131c34e6..e0bb7e0b 100644
--- a/src/msvc/Hacl_Hash_SHA3_Simd256.c
+++ b/src/msvc/Hacl_Hash_SHA3_Simd256.c
@@ -5992,12 +5992,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -6038,15 +6038,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -6378,7 +6378,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/src/wasm/EverCrypt_Hash.wasm b/src/wasm/EverCrypt_Hash.wasm
index 1447feb38afaaca8ed624e0dc44b1238d670b653..70c58ff8fbe9165f06e52dc9e9869015bdcc23ce 100644
GIT binary patch
delta 84
zcmaEIl=;<BW-f-r;#@`s1_s8h6S+PKvDX7ZU41=ceFB(SUq4xpQE8$->%=&j&HjvC
ie2o7#-{LRjW4y9CK+c4N$&q1mi<&kgh+eO5x*GuJb{vWT

delta 59
zcmV-B0L1_5#RKHU0}23Pb8P_t0005AkqYvWuoRI+8Iv#p1+y>#s}2GBv)m7L4gt`!
RQXVu2vxp`u0kgFyHM??26z%{3

diff --git a/src/wasm/Hacl_Bignum.wasm b/src/wasm/Hacl_Bignum.wasm
index 6e090b50b5acc4b01d3eaa14e9a14507c7cc0467..9858cc28d9d2b50006974993d3546bb2ac6e8ea0 100644
GIT binary patch
delta 2595
zcmeHI`%~0a6yNjRFR%jyh_DEX0?~{FDn7#3P`e{G)=ZQ$<qQUaI*BQ;P&3ua`XKuO
zNOITfwCWI#DW_%v48PGX34%OA19Tx9Qj&+}_(B0SwV8s>xxn|kU{n49{_?r!p8GlH
zeD3|8d%s6o#3L=Dd5&7WYnMekyHeK)Y!F<<#fwzHVud;4!Wfmn9txqL#%ZAYhO~E5
zGm>MLs6;kP2mu$CB1C{I%NJ&Y8*39HL9<_smR?qyH6Tje9#sMk7Oy=t_)i<4LGZgt
z!)>j*h{K;j_xLPWRO9S!$RQdE!gFZwp6l<1!!rx)E;#fo;aN5=O{X=-3KPY>66;B`
z^*R#`U8|aiS@_aDN^OZx6+pxKNBlu#-fPp*wPWpz$YV9t;Vf_SYH8jZKdC`u)dc`i
zyb_3FN~m(hi6wrkRG8v9HNm`1OUDyPd7IlsB{+Q>1$S@j9}5m=ebwG<=-p@0IrBZ-
z+k_5-M7)cVL15F34BdU-&%ko^#!YDN+d+1gDUlu89w)Cw+Y{Jp+4=H(GPgwbN86Pg
z<#q%6CfiSn+?frSvZgu-s|Ni5G?{grA%1)Ob;O~Zrx7D+o<mILxceirTe%IjHngj2
ziRpEMn86>@>Xs9PMI7t6$Gz?%dal*Ik7bhUBdAQ?Nh%ZEK*3456r7VwVQmeBx46Bm
zk?f2{Dl;vgnAuIlEN&uyeLkT_Goha22hCJQx0lMa{m9Go7f_k}Qxx2LDjad}(-nv>
zoF?4Pv1A|FL#LY&OU;RhBW7YQYbF0%7QzZ{U+3t({|<Tr+IM4_gYBznH$7*H5qF-&
zb7a4rrC@NZM2zKV<XG|<d4?UWh_xNWoY_fCT_^cX9PJ!?50WSP=X;1ghpCL^uz|`z
zA(bgDq~Os)3SQnt)RfN&E4h7(W5lmirmu*Y&leN(!}G)`E+!n}cGLy3w_KnyN!?VY
zr~5W2O?&1GBl^~S+2x8&w6KqwFTI?asH2>0b8jN^Eu%&pTgxoFEZdWLS?tY<1@c;E
zSpxgJ!d=c2dh{G-v&8@Jr4ZB@^KTa-i(R!*{;@TuMwiO%UUY6x8XOPdNrOMy;BT`v
zJ(Y52PVQQIIwv}NYR4K_On(%vkNrL93GVFA#LKek(sG!g360XL=BiX75qe0|i)nna
z)kkCpE+0is-0$=<Nf}r4c-O`=7>5nAJvvNxjQ6r+x+4H<y;dVH7F}0};Wx5nV)YGA
z2+{=cQhJ{dRoax&y*E=Pa!Bh2yyzxz?NavO)yI*cCCTjWC55~94@HUZeMQMv_y1c-
z`vXNu+`|q<Nz`B8DgV<4BcEW)?irnrcx<$1LN0UiTQ>p>DX&##*?McE00t0ki`5W|
zLakXMXhF2*if|0QD_tNGcVSC)g;!~H?7wbpcf%NYrkf6Q`uw}w92z+3#QfUbnK|DB
zOfs{{13Zb@;R*OOTJdAxCdVt-HarFXxXp=afe&bLqmNf#0BfBe_@V4`esIM}j6>@z
zW?g`jn1O&vVtfOg#pDITc_%T(U}$g>;}ZsE)L$9~c<@$77*ttXLSd1j_7TD_{{Xpm
BQD*=E

delta 2622
zcmeHI`%~0a6yNjRg=K&Mk%wRu5Y0%S;v;-TvLiOuOq4U_3<d!@i7Bv9Gu65)Ncsbi
z<gV9g)gc~JOw9xsexqFy1bKvp$U+t=$wPB|ArB?3nS#!_!1udgQ~m+|^10`p`#I-)
z?){#7zh$kevR0Mx$pFZP>?Ft%vJtr{figS0>bal6IXU(1l=Q^N<qBZ?gg_8jvf#&t
z!BMqrsY1!hgiuhi4&e!KkQSCiDjeC65C&?tRtN&8gQ^JeC8f~`K<sg;7H}}R?WMtc
z&L9nf*DV@`e4JG{{OLc&X8|fD&h7>tp`joohX(fr-fA44UTk&1p?5jYvSmdYt=U!^
zDDLGbSDI~(Hqg+$u9=vHFOE@aQ*4TWq6Wsj0hs&7G<5CW_yY1+ZA}Qv+qPa@`1%h@
zP@AKDfhcZqL@~rwJL1F?yF)BYa-E%E;no#X3B<f@?J7AqZ6^iy>>QX34q^Q@?rh|p
zXV5wSUEJHW4y{PMOQKd_b9D^e{ol^Rat*qzXdl>3cBUbo9oZEltwmVl*sJ<{X+D!%
zBKae%a*hhCmVKr762r3efGKNh60oZOcW489`~*Y%`owF9BRNkYhSoldn8<O@hh(>L
z8|o}**VGZy?IbY+_R;G0lY~Va>$%6d{t|kw*T08l5*tFPOx`If6VOP(3Aq%UlS^S|
z8wqc7du0>Z=}lB-PCha9&BQEjCVxXdp-T&)hU5D!R3>^ql{x!8FEda;W%5r`aNp?=
z#Nm%uBd$F|xQk=S0kTKVv>=um;}OS=#9Y}%{x?m8mE69;(f!~Z^!T>#!7_*2*VAr#
z&lMwPb>KO&UppumY-<psIO;f-d_ta4TN`3sCo$)D5i`1r{05Fzj(vy86Y=91qQ_Ax
zV>+s(GEhimN((7?qL6}DbrUt|Q^G24-{u(l3zg|FBIfhO#Qfj_af*uxN4On+k?idk
zsZ2r-mFevn0=a3=eWpX-hR?elv5A)SQ}d-&P!n}lkZtUXXP)KMh?8p><yRzoCNGn{
zQMp)J%P5aye^)w7d4i6e$84t9|GgA~5@Y`DLS(XQ7Ro=lX4mLqh1HGD?NNi{Av|jE
z2OIn?wxPF5>dcv4OHb!?XV30fEsGoo$Mwm-2R*@E0~vT()?Ho&^VC7%8pQ&IA}~|~
zsTx%(pX~4ecIe76<iz|&FO!&lRfBhJDuZrRE7=pHbjNrvi-tP_zP8tErNyEfGBM<)
zULw}qbOnF4KQE>62vnrbD&2Q0X*!3vY0!;s64x&3hp#=13?Gur?p~I;`+t{}c;1th
zd~xr;m9*cNmBjqfDJu#8>l^uhdVl;QY}vgNix7`b^iIoVc7F2~fD!q%$|&D#ZV~|B
zNy{=NMA18zsRGn(&Q-y2^saG$Fx-VD#Svbi)ye<5xm}Gh(kyx|(CPE<Zm~JRDLdwu
z&i2ejE?|(D%`V_d%uZLpr_qcb12;KU!IJzKc;hywrv)D1gByLg`U05iy}%1)pZ9{R
zc4BNk_F^{q+KCzTogv24&t6QPA6&2#qYHpWJ24)?U_||;!GH&Ewgp4Axituu%4#1V
G{PYh&N>+RT

diff --git a/src/wasm/Hacl_Bignum32.wasm b/src/wasm/Hacl_Bignum32.wasm
index c2102b8189b2a5ab8d663a53be33e8f1e9a552e4..cfbbf5e97f243a758c4f16e6e02022a1f45f1ed6 100644
GIT binary patch
delta 681
zcmaEs{w#fhloE4&eLZ7+0wb8HW2%QS5rPvBicQvM<lGot%sBZYqbMWOWGyBIM&`-h
zOqz@=lcz9gGqO&e&nVAgXvV+>WJpd{VB(Zx=MGOS&J8XtD9A4=5kO+cmlP!yY~IK8
zo0XA$^Dd6%OpNT4cX4EGKEvI?B*iM_$e_W%;K-uDkmJah<H(W&=4T18PtN0;j89sc
zzYpjP13^)saR>O5VM-Mk6d1D5OtKaD1Xr5J7d`obL>|y70zwLmK)r^-%i)smg}Z?w
z)gpR8=~W`mAZY{f^^@m{O#%uSh}$v3RU#Y1!^ok)ps1msApmnvrBp6Z>vO41Ky9<6
z)j;MPltEZiFM9|m;Uve&%)=_BzyNWr0Q2N<#b9A%0Yz>FZbt^Q$%6ValW!_c1)7qt
zbQ(xnD+3)kIZJsfkW^5y0LDzD%4|lC&0kd(F^WS&RDlyM5Z0;P16tav{+vmSRT1c5
zN5*VNmOMvBXkY=YKd;j(!Kx?<jsOK=kai#f#SSnwHtTD5F_NQkvc7iy=Eu6sprCU!
k07lm4zk0xcg>okU)vK9&*<cM&xWUi@7*3ma8tF0u01mdIk^lez

delta 671
zcmaEs{w#fh6fa{vV|@a1eLaX`oG2?f@u<*beMZiW(Z!6DS2GH$8JaOLafc@s=LVM+
z6yz6`2qUrMb5nDxQj78#m?zI?l;ldx$xP3SH#1>inf!!Nfsu7GE0ZQ8+vJ5za*XVg
z^_jFc?`QhO%E+>L7sqlYMwZFDII=dM;qG9P;*fG=&|qM2WKm$qab(PKWXS>ZvjkWs
z=kZO(CoRq2$H+0+Ku{ED+yVY%m{J7>1%@m%lWYY(!IkFmMNfVpkq2~&fRF+sP_Loz
za=7Gs;clQvwTK>2dX<PXNZLSr{p7h~lYjyS;&zO1mB_~MFmfm`C~7EZ2*BJ^DU}P<
z`dn%gP}?kNHIO+6Wf0cX%N_zsILUD`^KeKhFhE=@z&bfxF<2N`K#^O4+mXR+vY@`q
z<eQ39fu`guod%ND%0LHB&QjhABo$OF7+EJrs?26&-~3f&5u-RXL=`yE0%4unJ)ot%
z>d%?PI23^nc4W+UWXW@6ga#JS`tv%y5*&)6;0RC<25AQ(Q0xF>W3#??7b7_uC+lnH
sZ+@)H3<^3&17Kur{;LNJSSV-mU%i^imkrhcg&PblfZ?=xr;#or0J_YeQ~&?~

diff --git a/src/wasm/Hacl_Bignum_Base.wasm b/src/wasm/Hacl_Bignum_Base.wasm
index e407cd7801c73d69dfcdfc15c6db86c600fcf01f..9e75139e78655daed16185b0b7c989c0559e0df4 100644
GIT binary patch
delta 1271
zcmZXSTS(J!6vy{-n=Z_pbDKJK?xuOS2rqb_mr|!wx7pNOVrJl)X<kC{!F<V(Qs<u_
z3iKhNmylwWy@Z#_H1VN_A_eKC=%q|zfkaWi?|FzI2A`ds|M&1=|8x9H%)i8@;>6;S
z5wBY7Egtm#JiwSx%M2_)m|=P*_Rbm%l3-_BUzdHLr>DEOU-3`f+STUqbhnG7LLf_G
zfx;1{WkEtOOYtqT3C1Kot2kTAB!X4k>H8{sCgB}l2;&cgM~4K?%1{UAOsJ7_B{Y+B
zpO`L3eU^AjzQAi+7?O9xklYBvQ>Fm3iCx4yiXGk=4Zp-S`{7B@7*x<aR$(|;jo}P6
z#yW_%iLZ%45zs6~AgPQ*GA9zr3&c6%Hc=i0&2AJl9U9V1MnhwaY2X}+F>%hs<Z^Bi
zm9eO6iQ}<rytZpwIPYtbe51uvc^qgVULh{WedC>ny31UXsV~RB88*yvMjKt6Lq^gV
z^LV{YRGU!Sh!ds_UN^@h`6M36op?NrN&p*(UgBngpLbRcLsOdA4UI1e8hbK^A0%V=
zZ8F9bDWH`&PFziaW;zwgy;LMsX~@(QuM^ja(sXEkq(d{Dag%F&%A>sj{A=u(J3u4T
zm_>;qk&b`dX67+VJoMWw=WEOQ%v1YWJ!1aVnM>DmJ`eBvcOI7I$j7n<^RcXX;s>Ip
z0G+x5tW{Qsq^l6g(L(eWh+l}tB6OOHb~v+(vDW!wTI)MAG@cR+FPC8WV+o?srC>dA
zjQF$^nt?JTUzH)bTZW9O9Bd{|5m(EhnL7?m^a;|GS3t8>f#LK@yhVE@u3>_>PLx!k
zQ&jbWQ)<EWoUtG|W<h_2_=}iRjZR-RuIF40-uI^(x}LOJtaY;%%Sx%ka9tgib&a@2
zl+>eBT#vOLv?1xXAvtM7e}fogM_o!BvHN-5?||mJgRZCJBs5YdhTEMOzUsulb8xy5
zWj=AR5tgzhmt0~N>81KWom3Z?B@*fPgo_pxfAq2mVR189L>wkAG(&pt6qgEVEvx?>
ZkL>U9_(EJ(Ty)E6tVh|3i5so&{{Y>NQ7-@h

delta 1294
zcmZXSSxA&&6o%)U#WBnr_uOY3HFuG4A(wGnN*&jkF_$#6GH}u~mr#r*Z!)CR@h^x1
zy-4ULq?pQX!lg1zyy&7xLApu0DU+B$f~fPJ3khQ2`Okm8_x)eyJI6IIT=QZ)B23tY
z{gk*Ow-dSCgTiiiWM7uFVxJIFywD1{V=7W8B$J5KDm`jjdO9q9U0t2sy_$dO=HBj>
zE}6<P6`@Q`m$^x|M1oK|wsg)pF)D;`Rwy!5^tlQ}oAbN-OC@!jp41o8(*qK*#LG&Y
z^wJX-z0!!g%w!Gfv&_4iS*p$6P(JX6a@8BB!UxP?b};YzY*J^)_Y%eI`o<w9(jPI;
z{9*P8fH@@qwpQjn<~yc35HWLsQ2HH&GV>sm7nsw`4W=dtG220iX$|I>u@J=QLu-fw
zp;5%i&?Cfkre7H9O6Ew|GS!ywM&hGzDBp+UR1*Q3nAeyK5kIK&M0=HD(sV`mH-nK=
z#1MTGaX`;8`Yft9m;q6!&CJoLRjM1Jp?n?<<z_TagJQrMrh~Z_<D$;uA;c8Kb|S_Z
zhZsvd%#Y(?{tyqFPXbuM9APdcAZ8*F%AG_g{ga@nX5M73FjdKj`IU^A!IV1`<Md1I
zmeXVSuuUGU4mQeSWil=OY(~*;RJQZGO+#Z#|4ORe^ezQG+US<(JLj`--+!_&ENeD~
z)t`-F%`iVRgLBZS%E4IObD?a?g>ont{aNNWrrv-~y<wA>k%zI)<ndTP8WGc;5A#Ak
z%v<@8h7^F+%wgt>0>tzcLix53%I!jEqKd!<<~Vby2r<(~5fgHZV~UCqvtA5yatUtH
zQi5$5Wv(!lrRW$+UlUa(Y|m*El*1<U7n#4AnPuqolwo_$mE*p@l=Jo^Rbs4bl^9k+
z70gvt7}gEuGE-TNPF^*}y59_?%?#z38U0nJ+Jd@(X}7qj?zJN3r<J#-^*CZwwJ@Kl
zh533d46ne6I+WSW{yL--);DQ{QKnPr+_WmKTe?i9+mSAs{ORaTJ;aIz(7+sI&Nd+Q
i;Ry<rk{Xx(TORkl<#Bp8RbXElPhvcN&2Zdq{`42|lT~~G

diff --git a/src/wasm/Hacl_GenericField64.wasm b/src/wasm/Hacl_GenericField64.wasm
index a475b2db329ea3d4bdae0fec0cd095ed0aa465d5..8f12cc14ad73bbe5770910a34afb72b399327c38 100644
GIT binary patch
delta 387
zcmX>WeJpx{loWG)eLZ7+0wb8HW2%QS>lrtih%!#jVH9O#n!J=zfsuLgF-A>BmdX4~
z+KjA|Z5fpq*?^?v<k^gzjO>#?Fv@MVV(McSXO(hf&|qLtU~pv2apcGWbFu{3C*Nm1
z$;iHWC7U}V533YRR)Bf(ANF8WfoP5$K=VX6uK>xHoO(b~f(s;ak;?+;1}^T|j2xSn
zaho$UvQCmwWMrSje;nu>0eK0a=@0q!7=e_4pac_K%t=^<5y<gV04kW=Ad&!NKNJxK
zs`@U%$pYtyOTGYV!D65k<78R6PjJ&T<&jKFS3sC{Pr()FBB;?;DppMJfZ#+8h|S$<
Mu^>mjRKLp#04<SOBLDyZ

delta 376
zcmX>WeJpx{6fa{vV|@a1eLaX`oG2@~(Seh3vKpf>i=i0<(_~vlMMmbyKN*!6StiFb
zDloE6ZeY}8WScC*B*(};c@?AfW-F#XW^oQFM+OZB1_cI3#vDhE955$KfMxQ1){~4Z
zn^&^AGxBgq!DIzkC;wp&Miq$W*a5Ubg!2lJe95T?Bqg{&A{V(VfOc|m&t_!byo}qN
znUQ0Xj3Oh;B>v-!9Fqm)C4i<s<kw>aQUZb!OmHzLVHHLo$4>#MU~+><0+9VsL=>p%
zy9g%>oFgvz0;mOxfl`c<W#vA>P1BS|GA&&JVcI<fSD=faMq8;^F~I|Z6Ez?<cdNyM
K9QjiHE-L^fOH};;

diff --git a/src/wasm/Hacl_HKDF_Blake2s_128.wasm b/src/wasm/Hacl_HKDF_Blake2s_128.wasm
index 03362c9f65beb7722af8eb9cbff0a8f177f1f292..d3975181e198953ec1263118652b776e2c8f0329 100644
GIT binary patch
delta 103
zcmeys^?_@GloWG)eLZ7+0wb8HW2%QS>lr7S^iO;y%E&ZXo>763d9nkeCL_z_PDX7;
y)`^AkEQV$bY!eG5xe{|S)AQoZOc>ZFXEDlc-onVk$jCbRAd@sB`{uh$lbHbhnH&rN

delta 86
zcmeys^?_@G6fa{vV|@a1eLaX`oG2?f(V=-_tuTwB83WVA?TU=d6AL9L_b_rYvP`_G
pz{ooBmnI|I<Y|m@jO>$D7_~QVVdP<C<d}SrNt%&m^IfLNOaNOy7-awe

diff --git a/src/wasm/Hacl_HMAC.wasm b/src/wasm/Hacl_HMAC.wasm
index 8752dda84afaa84c96f21a1f3e5cde1bdfba0ba5..e20a106ced044d30bb833780ca044b59b152de15 100644
GIT binary patch
delta 108
zcmZp;!`OF+k&7X*IG2%ufq`+xM6NY#?DarUH*urz#B;n%?DZS(TxDZCwRsb}t~*oX
zjLi@Ijx#ekGHhNERL8`~uvs-!oCQc(M=oJxoVNKv6ekNv@@3otRuJ8pGL0QX%jY~~
F1OV}bB%1&L

delta 95
zcmV-l0HFVl+yQ{x0SW+Nb8P_t0004&kqWE<29dZV0S2+&)dm5+v$O{-M+y6vfPer3
z0)B3@{2KxRv+Pg40<)}Aegd;BRTc!ZL|LN&0g$ukS_uTR^Io6@vz26)2eU0~;Q>e(
BBc%WU

diff --git a/src/wasm/Hacl_Hash_Blake2b.wasm b/src/wasm/Hacl_Hash_Blake2b.wasm
index 29138d3dc1dc597d2e8982aa1a7293c07bf36257..57b34c6b6b7777a4b39813bdb1186cb1dd52831c 100644
GIT binary patch
literal 23807
zcmdUX2Y6h^b?%+Tf>>Zt7QGSVIRHrj6iKjxB&fm#i%5|YEmNdT+Y$tlAcaLC1%k2)
zNI+#JS(fA$+j8V4r#OiXCr)wV#7UHw^5VpCsuSlWah}uT6en?cQC=M9KQnXZ&hB1-
zjKB9i$M`<nJ7;F@IcH{0yL0AlFnVc91wo+x{LzJlp!;5RRb9m;F+YE=npewQ%I4<_
z+gG5%)Y&HB0D|(cHfpcXRZF#+#|m(0PL`@K=xI7doJT}MJ`ztofq~Xm92uP%of?}g
zn;M%+1ex?}Fb~HsU%Yr>X12*IkDMNxy)Zd8xIGB`E}d~*MlOz?nHW18D8FX`veC)$
z^V1_hmhihY_{gS4CnqnQ3CjEq1vm^t+qMQtze~o)F*`GQF(~(&V=!zP3|9DEx_k_0
z?i)KZF>?CyxpQMPBbUY>9t$e`zKwogOyI*~GZ%tX$Dz?PlOu<xE>4aNPmWHEZ5`Qn
zVfxbS==AKRZryZf^wNE{>Ga6R$oa8Z;66V!de`Xb$+4hvndf<_=f@w=504B7RpE+O
zogJMW9Rb~$*|Pbd{NQnvn)G$##Np}LEn9buT;8@7tAN(B7ZxB4t?BBT+PX}ALt|5O
zOKV$uN9W3}RjY|te2=uQm=CfGDvj&QL3GVXQD4wj@v%HQwzQOTIX*o;8&t0<ZMb|9
zY>tggU7nmB59+#>>@YccX*S4o9m?)II&vs`{Lsko(d>}}TZeH!w(rQu@Tt2F94||B
zmbM=|ahGi^i)lS_;FN7mt}K1_c=l-ca5<GWzv{rNj~}}ud(>iBv7wZEBgP|d9G|@p
zY5`5VG%`AUc4T7gp`a;dG5zk+r@3t@ltIf4rA;$qmmo`;7@pRY4AG#CuGKqeZ(X8G
z&=JFhS+}4wrs?eX`7tck%2+WtcOmGCX*f4CHWsXkX*hG?;zPme)upW6CrT6!HK6s^
z=;2QWR~prTK9^NgedVK%LiGRU<B;4FiK~_KWmn7Q6IW~I)z$v_>Z{ebC-9%dztqd!
zFE8DeysGBSf9n4s4xuOi*`O~7xGJ0BG^H~Y9U9JNvT8D$n$$oplgI{~;z~+U<sdp&
zQ}uALGYBZ1RTG>p+lxYQB)M1jrfNW`$sASp2VSQF4ArV!rPEZ+HK?w|^R?N`;qdty
zuA_3)fI4_klR3^%2DrGM>bZd$xRL5~bB-E?iQ25nacvC#Y?7O($qgyF+>n~7*$k;k
zhSUU>n!F)31Ck*%+aZ}wF(j#(TPTsdIy_%K9HhCG%3QgPS}2)?iPK{;h+qJF)Zyej
z?%CjAa-NdqX8Kh&vGnw}Q+pvMJihpJK~<_pp>ksJ$vK&!!tN>EeP@sY{~grMoz$7W
z9<nQGWg!jV=}{AlPYa2aGJArti&pX~T9xM281L%B@upDKx--b&*0vLtL@`mo4AJYx
zgm-%r&VbZ}o^x_VJNeSW=<G?Pm8T$EZPqx)J!oG8q-(NCCs;kamU>Wwl>{m<5f<~j
zj@BB8*2!qrh)Au8A%Uu1VC~ITJFLCgs*6}u?!z$pso#m)vKq9W)*Er_m%;P{WxqH2
z^?*d&*4q)APBHAzdcJ{f;0<&`)=GDdHv&k7)36DEo0Ea;m%s6euU*Mh@@Cphec2$F
zO-|-S`?t83t+W*_%C&5BE!$~3S`w~hhie(6L9~>)mYuF;7wtkz(zWb%EkiVfmU7p!
z$F<x@H=<>QYq`m_+)Ov4rNXt`;#&67UbLiKOV+gv(=b{pUCTb#vY+;&rOLG&a4iSv
zAX?I{<yO~nhz_Bp+O-^ZEw|BaXsK~6M_kKMI*OKB*K)gSxr6RNOPy;uMm4m<N+-Vx
z!0r&Z6TnR&@M_usaEk>+TsP3GpnSO9Day5hUM<R1Lp#GKjsw^e0(SwpB?L~;Mu6Kq
zTpQ`Q;Mz!c39gNF!r(d(K5-KL4~M{O0Nfq|ucb`@vmUNZbW(6_qSpwnP4rrW>(=mz
zQ|Nyr1YQT=SO~nH1_18!a1GEY!8Jgy6I=uIdck!Azrh#%yM58W#~1w(D9^oF#iQ8-
zpU#%?84BoLNPnm&X916T@EH1^gI1pN%*R<iPiIkM&BvH4&hdS8-avGptj9SqALnB9
z6;<QFI$nS^2dv{E);GA~98XZrKr|s(#|7(n3|8=d#^Zewus^mbJTeWc(;n4HzCe?x
zv16HX#XMi63kIT#LUmfGPRCK5<oi)Q1I#lX=KJ{)-H#fBnP+J>OK?tdeewyuOqWa>
zT^8gsf_x?pc|fPVAw5tu#5dxR2SNBjkMINh5IukzJES+d;v7Fr4;hFa7OD>l)d%CL
z`a<vssQO&Y;gKt#dc~vq2w$Z~P-CgixniE@>8gQfUZ`FXs#oHuKEewM^^YpxepGqL
z7Zg9HXhBJ>ozKS=J+88Cm{HvNWJ;e<gifXhJ)wjY0MNoU&PNu=MqXiz{+OSXhn^Bn
zp7J@#D}GuLdZX43^+^wy<2NZn2h&P#(p&%l7r2&kfvRVO#xq4U-mGb$*3!6JdYJMY
zKdT7srh=Z;BmjT}uB9X}E?+60lY#nL@w`0r7GdHoJ`>L=eybw%My+Myc@N0*+Z3UL
zX{EPmE&zZFTw}NZzPC%mJA~3Zd`fRu{9TIPuB6sb;&&=~r^;rqxVUYW#eJ6|bTU2Y
zU78vIP{TEbTA{qYTgL29!VB`ydxVqs_?*03@pmgiZ`9f`zu*CL{GuXsFs<~W<^lk?
zz%`bOQBQZ@D_r<$^gem${ldljeJ<Xs_yda28?}~;_jy2`Kd1;DOe=j*a{&Nc;2OgP
z@Vz7r9}-F*@+rNf_<Iz+q@>o){P!yQUX{HCGmo2+ng2dT=wy1(_i1VXKn>RzYC#l$
zeLn`AbrINy1#%(S4=DP6#XqR%2UUNN;vX`F41dHF+W4cU5b&ZD=!Z066h4ANN@_le
zLPiRUD74Xs6@N_8$AB%vKWqwZ{3E6i@W)Ld#Xl+q`eBU?g&)z_Q24mUhQg0(Y(Jp*
z#}xe-u(k1zn?k@pVG1e!geheBC#66?uCbx;6B-)|pU~J)_({QrY5FP57=IEkGyKyE
zUiW826i^rW-A_pkIz%-;E7SXPvSvT$ui2**|GXk}N3E5PpY?z_epwMZm{xjOvjza`
zg=^`0q3Rce#xE4n_#c`EYAua>q=zZb@h>VuyQ!dG)Fc3a1g@ndFtDHT2lh)Pga2iD
z>{o=1U-8-aCB^?!5xS$+vhm9vkmp}jgbt>aepT}U0DRyY!w2wvS{i;$DE*pG>C=jT
zUD2nN)EY|s8AYE_+GFb*6yY}%p_A!BzoDrC05x1=s0D<jj+XPYGU|eP_)U52w}hA9
z@_G5J;@?(;?x?k+{!I^<<KIz)4yKiUNAm#yeBc_(M}b)VuJBR7+3(3?pA$Yl=kxKq
ziho}bx}(<e@p~SS=RZ({4yKj<K=T0reBc_x2k?Df8valy{h?3k^NRnMqR%U-wX^?6
zivGxWcigmh|6@hyWO~paYia;M4c8cIMe6k@GU`wI>+uD7>`#T4KlOR}6UF~q5xS$+
zj`|B8Fvnk1gbt>azNq;C06uUn<pWiJCN%!6h{m658mP52Mx=)+&+%U<Lc6J;ztAKA
zfCR3kB#OoAFJ-6&w&kznvA-5J{@Q2bFBSidB6LTsW#g|rAkTlR2pvo-{jKH$0QkT)
zh7aKTk~I9CQ2INc(w7wfy`nEEsWp`N9~AwA@$k555C4yf(8=_mf7H|ffEunb)QW`o
z%QET$_4+4y?4N~~fA)F#vf_VHgzl)dqy8rknB%V~LI=}IU(tL303W!<@=+jGUll$I
z)az^V*w=-Rulsy_Rq_8(gzl)de0<FV^8Bxg(809QziK`JfDc?__yE3Fq~YI$(!cqX
zUQzsi6}_UQ*3SOFEBbfi<#E$q{u_$W$@HLaXlejJ4c8cIo|pd*j5zZ0-xSDs`Go}i
zhvG*Q^k_mn>0_o4@Z+YC;wMZY!+9ytV~L#sqV@zmjzUTbPoR*ILLP-S`ljM16Z9mo
zW%wymXyd0%A>cQeLW-Y}0zIX%q42cEhQgaPHWZ%G*cKA}<^;VN*i!tgDP;ILQ)uJo
zO##8N6zExv4Ta}4HWZ%M*id+j#?~v-)f+ck2&EH*B?(`$-<F_&-;PUE^$vkttn^(1
zxmkQ?0!ZJLz`DLG;k^{SHNo#r5HO?GN)T{5AkQx(2pvoXy`b3#048xQT~k!OM`*mK
zh{kto8mP52z?%c|{9=O8!Bo(Tng#&Sz%`b}dxge(eHw2|@cR;kIYF(Zf!T0Cp5LD!
zbTAe4eoX@aXy6)4;{!tD14T4GsA-_q(s*Y=x)>nOFC_>aOa;B9X#fBXT;pg^RY7(7
zBhM@>1kH)7jYuRQN4(^JxlV>O+GIlown|f!D%>i?UcY1_OtPVzS0D$lVlW7hb*Rh+
zLqR|glL<~?h}dR!$p+*kAYM7D+!%PBz+{BMZVp$*=Qx%kiyVg`yJU_-$W~jjrc)eQ
zo#Vi`vQUJgE&Yf^u0<B(Wjz2SGKK;rNsXZ(VQyu(1%pA7YV{_&Ktr$&`c_Z1R5chh
z;Z`#UOHs>p*_0$q+~ivAil1DhRLnj!cB>~igUQW!lUol6l0x<UCX-`2#f?%DGXWAa
zo@zJwn%(Rxc8lEstwMgJ6{xw*RUow?R;j~PAUPpMZ>6i~qAszit6aruTHPNIldE9j
z{-6&T0<+s)#XZ#1A8f{ceps=V*7gUxu<73{d58eF4zat<v{I~YGp!Pv+d?F5Ez~V-
zNES$2i{uB|VAQ^;?&n@g=sR}O?G}E*Wd#}{8@Hb7kOm>yYs6-Iliu4GlPH-j?B2@+
zY@iLikv8%s3b+0@32p5A=NO5B5@ANw0M=?C>l0idNU>bJxqx6Bx<#q9Ewn`lVw<10
z(Kg<WM1VBsXuA*`5P}0S1gU}g7O8hJn_7I*!@e;{EMmW+u&Qhk`%`{ZUyxjs?Y}(!
zgi3>l0jn+3>sM_IGK*f1jX}L8i$^omAkx}K4Kjuua!c``+%mjVZf(3vZp6Fg*2hC~
z+su39wu^71HrkQro2Y{Z^L#VXSv&K53$3DEdEQGzyYrl-ZW_w-Fs-3IdEQ6s=*B$n
zr$)L-7;mJTh1o{BMR;kXy~0ZqWu>i&hNZ2E_DNe4?MGWSpo)UYOn>1iV=vRo*h;aZ
zh4yj<RJwvHmQm>>ms66;#a_a&8kMdPm9Bu8fCpzUQ-EZ-Q<y<v+jIhx5e927CEts@
zu&>f-U!|*km9BBR1xqH2Q|Bsl@;1fwt^z4sS+GV|fh4UgU9+obp%z)hR#(wRZL*y0
zuA+lFWMMm91(K7p#9hcf*U7l5sa}?&T2jOr+?r*nY9#v@;MOi}HPk6>HPj_-i1)0H
zZVVJMm_8gZ5}8bxO7b+WGE#B&x{HdFr*YlXEfWGG#A|2`Qjn~hbF@}?A<;OBnSWHR
z1Hp9#1f7ca77%Phw@AhNs80wY7s*I7!o>9I<{aH11lI|{buk2~l2$L$Dx>25=&g$T
zRaV8Ls$vzd^!T$X?pJ9QkDe}2@k*oOl`@76gj<R?$}I!yh+7*E$c=cj-1>Nn+&06Y
z;kJvn;b_2yJa4CZ+L(uhXrxVfScqmC$iqUk(&jwxLOOg)9u}gLw&r0Wx@cP-7NW{n
zz$)4yvQb5YA{$k-Q)DAeyJYIpv|HNJG$d_l+GA9FMZsjIKl2z=JdwDXp4Tpy5xp4q
zpD1$w6;Ra*N{P-^A+B0bi$bJIh7SffjT%N2;<SDNFDBwOJWuYSs3DxCkT41H^8{Bz
zbP&HdwXczCsC|u%S4<}`9$~O*UxQer3@0#Z5kw@o9(d|KJQ+a9%L}SHSF|{#1!^9H
z6UQFpa6<s0z@lU3YBT9_4F(~Y?m#?)5hgP@V^V-4{fbQJ3Ugpa4y=^fiIq?^4gS(p
z?%XcoupH1#E;2?RKkeQ*7R(t{%cR$sN!Me~dPg)Q#)=MX%)zQ%8!7A^mHCeqArX3o
zJyHcdQgfJEIpE?^&KFl)DQbTrFPqmm?2)1j&dDT6q(=*jL>UdF5UR*r*Hc3VQ!Ud3
z0Sh>q472>`QWW8T@JY;mGNF$zRoj`r9??X&4U2-%G~jPhG$Rb2Xs&?Rl#2k03_?un
zAf~k#aXB|Y6dF8H#ZW_0MciPER#XLKbmn3jORaCz4?!-%T&=2&Mrs;y$_Nr3$%z~t
zil8|-wk4#-!`gBkeGn~M%B_ZSD<bD>qn6`BhT(SeSUckDYd?m_+dkH59_vIDe(lE)
zpWDZ}%wt`M)vx^+qImn*YV+7?CPdqX^NP`W^D2r|*ds+4nCk&W!l^C-)2=GWqXO(i
z6;}w(Hv;pXBDFBwI`l?S8GJTOmS>_wccRrxzaqz4Ve>0On~&INK~3xEX+d3-lXWnG
zqFP7phas$j_?1&t8s^=v$0OyGPD5W{DJcQ6Ntpk-4g*3p%V4jPnM|>P{B=2K?okJs
zYnndc$a<W#nQ-AI0S1ksHsmw49kK)mjr~q)MyjbiQ(tZZuj@KDrqy37jAv#Iu3EvB
zE;czYK5fRc&d%lva2m$Hp%cuMNuX;y-jac?xT%y?`DFv$Y(`wLcv^dE3+ORJGBIKh
zVJxa(U1a&fLyE$*#L;PlDN!d#MM)fuc?Eh*LLqv@$p(FmpsxXX_-}_EZ}S?x#L|M^
zbc#bSFZ@d^cf^Wp2tXvU)u!#Hosz;TCEE^#RSDUGK=eE)ZiVUwgKF(v3J}SO%xrM0
zya?kQBP2K*q`5I<sd&r7V+lr*n=$NW@8~D$aL5uH4o)WlS#F!fXTg+~&Tx~QN|kr6
zMo1Z6$halkY{4?N;)ynpY0Hv}5>S_;wj9Ce+L#}ZCDSHkBzGCHYTZvOw<CZF=MWl(
z)01Lv%$vQuHFOrh0k8B1f>&kk3a16`3o9YZX12Q`T5@)hIwg)<MJr`RR#BI%!YZ7n
zT<wU1SEd4Hl8g$csg)>O7uk(9A)3JZM>lpr0$z)`=<y_N4ImrQtrfBYF*=lvpRr+5
ziA@jUKH$Knhm+P`Ko-Y3SG4mfEi5}E(HO%}BQzqIM!c;=Ypyqhu~m}9Y04U$P^F>>
z`{UN$rL29ZpnzctjLj>aT5ppLJBea4q6L%j7FOxKH@vCHl*7JX2U3^`!seVrwF%jy
z6Ej)`v)x$y>i^j%j8$uJR_$ek%1cp*W=rgxFoX?Ist}355QII1Zjss2gV(kT<Bydq
z?Scf$lIBdTW!kQq=@heSGQP;1U7M_IZ0gsNCC!|Wt+iyY%gilfQ^d^S?ZKHjJ#B?m
zdWA7l+O)~7Otw5_<ub+(0+QWTeZvN*SidIRVEx*}`h^R&3>OkuHbO-jvn!p+K^=~(
z8_Q-41|U1?Mlm@snI$Sh4RU}z8&HsGVu@vn6yZZQ><Hh8V0*JcrkQZ(GEPbW0m)21
zFCjeQ0}dq*`hx8wrwM(5Y{v_%2bBY>tasgQTqLFo!93c{*>y2>=>iy^1cdWatbd*$
z>(?z2!5T`7AlFcptb)9C71~vV^^Q3qhS~RE`4GToCLxi0yz+a*C2Y?K@kXi_;*HcG
z#3gu_aNRjZ%(Kr-l3U=R!Uza^#Gzen6}#Fz!EFS;fcfsgg2Qb%o7fJ>+QbeaD^R>&
zY2i*N^GbX=&;=au5e~;nKo&=*D_WtE7I7#gLX!<5yIKc#5=~WPS67K$l{d_2*xJ1m
zxp6oYGq1}CJA;L8@ZIgXan$9g8%Y^ZFd_rU^4%@M-W|g?Vr=Y%WN;(x#+iH2YYkAZ
z@mx0qJV$GCgtpLiuL)7F5!B-2qGb<VO|j;D&bp9rlLjsX2qW>=v85CD1k^zsM<o7L
z1u*s@!kH=-f6x73FGu8F_9h|s3b#s;KrcbG%)_vwq{PkC8aIQmj{>U8v=+OWPs)dL
zQ1+Hc*$YNy6h9nE!YY0UD~wbOK?q}OYN?S$lPsaK3`u+-hNN&y$;iHqt|B85uiH6W
z)-fUlW;_&n#izi;L)BaHHJxH)jF18gr)n8>5i<l^=@d_UsKP4kq2Lp=c`_af&Otm>
zxZz+t6e@>;df<a=LhMTo;-O&Au7e8+qg$X^EuM!$9U|hKwuaV2wS=05fI5e$x>U1Z
zv=B^R)A(Zlo?1nEvCl38X+tQD&ypa$*YaqhmznlrkM{L<K**b-Z|EwpEm&z&D*d?o
z@LH<~I!e@PYepwq$(58?9G<7jD<|>pQdSOwg?Hk|hJ!l^yU-`?3yrP)a2if9Ay&U?
z;&ypSHgENw(a>-6)+&ilg`2}lN7N)13i0FR&*3+<z+VTKSQk2&3@JFe4BoF2HkybW
zLjz(9>lh#awrC_2iv(d1QI?1zGC!dugnxqxS)7yQj5qGlGv3Qn^m$(R@+4Ah%tXw~
zQ-mXYd6M*W1{+u{dea>1-zvLM+zBb{EVl2RIqyY^IzuV!6e;YCAC*lIV^_uORmg{E
z@Q&W<;6=)<hOEwN4;EP<BDXr^t&XY1>M+5t9W>~Pc<~B%>%F9e&bs33BfMjYTQWWs
zbIbB>6~Refn^n9noOb<~21<@R^de=0taCuUIwZx2`>2=uMfTR<L$!YFm95Lv@p`if
z6S{l80qg390KtwV%+_NN8z?~=CeUk|k#NR+5^0qLZJOW#$lGR8v?&z8Ek&b4&e$7G
zz}pJoB$6(hG~ON8F+%xdjAVp^;RtuqAn(FALfdFNJ{sFhJ21fAv>FMTExd<@kh~q>
zn;<MVWg|7X5s+1bJ+A1~;AUOLcpy#<ZV9<8HrA2o@?Jce1<kA%+n_E-*_^kF2*^5z
ztO#f}h92#u)gu0TiA48|3JlX;-iOZnL1BNf3hd7j+QJZLe@J1!P}m<s0aXV=anL4;
z4`MxU#d;nL*Ygle)?qpftWlhEn}&^v5~Voe!J-uSc+E-FQBR;uE#&ETI)X325V*ie
z@*Q+5AEP_)?b<=U6QjD*TdP+AvTJqB72R6BT30b1h+C`U_=N5tOx9i3v1TWL;v{IF
z^wtb@IXao6+c0)(vQCCGdQxWeWDH$8K}TfGPRN&VvLW;uI>E2S{S>gDDqgQsIYL{w
zUZ+Crrv&?{80@He9oFOZ^!kGJc!P$c^|;%EWj*epd)#{5<E@9O#d?g;h%-|6&<Kpw
zz4Qi<h>X-IzW0fYQoyGR;3S`ckvbbr^fE?jjK=6*QQ@&rh0oC#p9gz%n$F;ZxVz~r
zbojndD)r%;G>^mD9Gvj%ltA7{qE?J1r#n;57_14B`y@}A+Q|u?qC-55Z!%q`>q60B
zFVe+=!QNj0CwT^siY>f!9RtRvY;=zdcs3mHWt!y&@Esk#ccTYrnl53yZ=}2VLArpC
z(H_KiXH)zLq~#G$4;}_&_23~_bb2tSs~8W&>A{td%Vlg9j@a=$XwG}`kGdSq=k(lI
z`JWFZbY3JBAyNrFe7$zJ$p2Nk$1IsOTUg(#{HVeck15c3OckrbV=70|9x4N1Yah`U
z#BeR8hN{PvO)8}EO`Gf@=k?cMcpdS0OMmfMgnlWLkE}zeCt+U)_!obR<RHjK*6siA
zyAIS@r#d_aWE(%62QjY*@Ek1aEZa<BM2PJt7ZI9TC_p_DoDJA-!)rs~+ssni$Josf
z>BKe%q^u-vl*mxB7C3L?b;1NkFLO3t!+sOU;vmlTkwt3Kkj0zAgnhM${4}CIO;!&3
z32&xT9NA?unh}c#9@`6EqVOIhKXOp?UZskvbT*@%nkAco-5JSdAQP&$L5xzk{Sj=)
zM2W<xb|rP1pEtnFtcK{X_SAY6AgHxWZkH?C;Yy1>K@<(a&N7Zuz(Nz=K#?Vc-IiBT
z$q)i8H<c20HXv=!F6Z4%4G1)azsRQJU=iRb@RT1jh~T6Io!lE5k7Ao1afL9m&xB=t
z+Qd;Gg3bUD=Q_lkJ=80?NW=2_A_i{oCQP;q<rfJe1_H#IKAhxD*TKvH4bXZKmjT1f
z0B@FKJ%|xEAU>?8?o7rcV9e^tM&TA5qQz05txjlP4oqyK$(7_<a01f2`frsL+KMGb
zgzI+SB)*kb|83TKnND$1E9WS;^A6gcr7#p4^d=;dpq+8GS$9Bw7m)7;7rVXV?mGbK
zh1I<5)R6pqg-pzDF{is@I6~DB@C<o)1_4EQc4;WYGbDJ1V(?&8duR`0Z-rBnTli++
zyxE(fn*do~cB3oW*^(AU=Tg(M9g8IAe2~|Mj6~M@7W^zh7@vs@?Ja;6&Sv`U0S|{V
zRJ?8G3G_bNN5f(r_EA=LruNZZ*;d(yGo6T{t;RrdiuWP(Z?|C=t?IvpXTdC*1aX<m
zC%{4CG#3t_yHNnLQ;#f4PXDq_G!fz3fAKiKYBLC7Dy<w&Y=v~P&Rd*Tg@&Vb^7lp~
z(e`&p!&>Yf_F#>3nQ|Q3!63?_oWxh<AS{j*t$_PpWhXfz9uYIdqkuT&F()3haJy3S
z6#n+5<SBf3nLI_rn|N`m7NIH0Q@}36hu4XDSwJi!VJA||Hd%!2SdsZUC<)RP&WuR%
zLd;0Uu8!$cFdLyafV)p&zo(uXg-{a^VKdJmY6N7pt-%#7Qul*I(J<_iXC!eX4C_tM
zl<*6$Zk#oe851@KACZxKhqy5u!PQ2_bc*A*1^Yu8BtY5<(Zf3%^d{wjQM7rZki3YX
z$2otE-sMG#Ij3cgDMfwZsyhoa8M1JNRZ29!uu8`Qow6sHLQ99l0yy5}$x0}~N%8nl
zhvEnUyGhtVTE&DJU<{GHArc@P3*a~JY|#!^Kt^Xz7ed{P-;MKz+KtD0K&!_K?NNth
zNY3LCkQIs^u>(CZv~bA1UPAkBs>G`;$-w~obqb_oEgo10D(k$s9(6fdhs29B41g>Z
zcc3`p)XMW(92V#;R{T#D<tM&vKRlK}+OgIJ3RXM3I3YS3DJpLy8eip1*xxIQ6ej#J
zvO2rOrW0l{!eA#v4lCB+yC#*XmOx61lB7*!8agC!K_<fS0~e{yOFTpR%#8@TIdhBE
zlE5)Mn*<A@Elz`7A$KshgdzDYHOYWBW?kUYOf@hNHBnF!9iWMRNvlaKvEdMw!l-nl
z(B=S5F*VFcJ2-9kf|6!HQBcyVp)e?E7wK(}o5n^tcmu_k12*bgOPUt*h8e(&>!2h9
zOQ{PrrXV}Y-cqKYdk&tfC2_SLp6hx(d8@JBE%vkq1}|@}CNhoAVoPelNZgXAHN=LN
z96pOMSczLA1-BGgXoMlV<Y^5dTXHC^gzWWvg%&*|f^Wz!nSv9tC10UM80=`}E3|5e
zbaf=XO#ti{a5y~%pjxCB=#c*iKL`6x{R*wjSz~$k=_MaV$^c^94}I(Rz91?1DlO{%
zEq;|&JpUj#0D?<?mF5%tmcB|WCWtrV=&Q5{J6;~lS80Bg{bVJoD*j}p!JEJS@T;__
zyZ&Uw2ft&#O2fAV)!ta;?I(#HK4rbo0Xndp7mC&Z8HR)A_4$Cf#RFw~>44{j4g!+6
z_n`GcrW0f&!eG_*Ao3Kq@*%o4@<NB9$G7=<d;||31%;!Y_c#P7@*cNoDD)mj#d{o$
zp@yp4f#-G)&tX6j9?56GK3H#t+(wRXkHG_u@%w#9cLYdx_;L~aS5hBQ+M{GvZgn21
zF7!w^c^CJAO!_G~&7y*yAibdE*K0Q&pPd;Sof@A$ziC+iZuIGqZCf|VFXIl;nF~`F
z$0tW;$1hCN*v!m@8JfL3Gd*^eW-rjp<>}e+Dg3bA==kL2nX#&a=HAzR*RlP_hG-l=
gL_Ib&Ha$yIqtlm1Cka1PePQMyfkw~k27KxEpL9v1hX4Qo

literal 22942
zcmdU1cYNH%mH*9ZWv#R;Y1P-VWxbJ`gqz%CVN<-uxL}(Su!#d$$o67uRcy;L1S3nv
zcuh|rfj}S-Qb|*Ka=BD4<wE+UT#`$><Werlr8meWx!m{7%x`|PyFbYapSwT82k&p*
zoB6%>X6BWdH}jL%dvs8Eo+rL>>&cUzyk3lnG5kcv$FCRTVv3)V@$vlnQXmw{)PMxu
zx7Zn~no&V1q*A}-{wgsvB9F4d7h@E<m__9F)97DS*|y%{-oea3$zWzM;w9r(qx)AL
zJAC-i@JPK=?%A6eIW&;zTIG3elUZR+dJgyQ>(A`>gxfNYSnojJfuSA{i?~f{U1Ed1
z0|STlc_nUxJQ`Y{l`Fic+a&4I7#Z$8?3KFJArzK(c{AK5EiQ$92Q&Nnd-fi?;fBm`
z&(Xf)8L!N3Tj#dL03Oc_AM#?&TYC2m^lTkGJkYaYptnD>qUWMRLq|t?henRJsj4l#
zM-Q5+y*)iW2Qnj|ePFQns@}Z=8Lxbb_vK{o_uZ&o?%Ci~_%m9uzjvg!2YiP|O2)m?
zO;@5+uYP-WZXFs~zM{M5*vb`{1=N;2bP~$Y7*AAIRVQm|>*^aCn^H5IXU%Sz(@HaL
z=Snf|rB8}@*|WC+NH!J=(l^vM;w9!3R~$PGNiscy#|B3Fyy})o8w~Uw9r2Qji|e&6
z^xV`pauB)&bv@eCJG8&2KXbEJA2K3tb8gd+nvA5^xTv^lICB&eqL|@r#Yr>uQgpV~
z-pr;+nt072Qs|NKW`$Jk?>mseaLf)F=o^MJ8LuUzV&9>|H+yqhizPV73g`F8YYpji
zUW=g5y+=JN*LnoyFNswA;>lZ~C*OVtbYN#>tbDv=tYkbgRyi)lI>r-Y2|OeC7sWru
z^V2O)UKbq`<N9Ch|Bwdr<9}(d-ScF5Iw>otB3ag~;B+!A2GX$s1!R+vv?t4CjLIn1
zg~oA;Z}8T59#y18zpN-(kAk-?x?Z)WIH1@-mf{_r(<qNZLX?U)sK^A+m3Y50o!sib
zpO94)1qR$9gBZxlYN`e;nWUtwp&D6BRjN8mwJbzsT4ZHq2>o<a)={19Qgo^=)l<Fh
zQXO}x4kFb#U8)DfU8*--(v3n`QoU@TNOWw&c<BZ&E*m#^vWXg~B<-b>sxO*A`YCU&
z*cu(jGwp4Pj#Ejg9(IwAOg`)>O660+>l1h6p<;6dko^;9MtNlNo5xi1HC_yI&!m)W
zrsnu`*v_I^`8+^puIQh*gIUbtQDcg;X_joEmbjdQ{?5toZw#o$HC_^rrZw;sLbj48
z2`VQs(9#)b0wns?Shw1u8Fa3ZXzXaB(IYRN&~uxWZK$6MqI1(xt59un9<>3383ZW^
z;TLmqKF!k*&F9|CWmTG+E@lDL0#IF$PFSi7(iMj>sd6E@(N67F&8D=R4(iZq*3O-2
z2g!D)_Z@&(%{okvb)yh*s6(DZ=g38LPTFX7RxSn*^M_#x06Qj2)4%-7dw+i-SuU5+
zGFq7SvgznRmaV?i)-0#xs1deig{@ghD^U}%HLGmRYFdq&5?j+{Yu3;j)I@Dfx2-vs
z&P7eBtyybp*3mlD%&;}**_!j|eAJZLnhR{rg>)fmVzy?ztw~cFHRZNugRQxUE<#O(
zt=VX6E~bl76Sp;+Y|UocjGBb4*<x$9(pJ<|+L}vj%{JPGnkrkf-PY`&9jK|cHJ4I?
zRvGP-mjUSZfy)7$=L1*JB7mI+WOZFcm%;e(xSWk^5naK?m7p&FjpqSa>jPH;xWEUl
zqQwALIJ6ei^O)9Rx{_%vrmHlqjs6=u(SC~$>;kae2d<_i0M|RTme5Y7wS;ytttE7|
zrnSj`<N0WRi4W`saH$WxfR+N>;Luu1&u3aoX*bhaN-tnq=g4bZ)xXwN{p(!S-vi_6
zNeg*>IwE`1C2}u$)C2AJ?PMR|>m7JM+Gk*u8OMF>lQ+;lV2u0NZ;PYy0NtP=I>7Uh
zVfT><u~(oDg6hFMs(qk((5HHhEsn}8^=XK*O!XjBJs3h2vhQ_d?+5Jm4Z&Lug6p8e
zwO<ZVKQN{*1GboxhiFJcbcneQGS|T{uKn^b&__V|h(r0X9Hzs-Xv*>^9ZeI06aSt$
zEk|fr*U<<QKf=V1gc0|s*Xh!+f-c^OS8f91n;gc+<O}H-Fs4g4+Ty6ZnO>+Nx|zA&
z#9VI*=jtlKad35|xCO6_g6pWm^|(Ai$AK|iZ?VOk9HSE&qA}(=%3MdoxE`0|K%W#K
zeNs5I$K|bp#u;Y@^OT@dBAvo;;?d6Id7B`#(JkmU!F&LK5q^uK7y9EW#%TeB+u<C)
zbcW?P<H~Vb$lC>>HL#|iIR_Y(cL+iQT}yW;DF8qU{1!_A)Sb-Z&H^5HDIUNY9@laU
zT^^Np3qrj{(A|m!0I<MsF$?s|wTOGTqi)ad<(FQ>LcGWo;vOMiEC{WEHA39$06BS|
zAT-dmbf1y}0HnZgh!miAKUchjIlaW?bia@<6?DH~tmY&i5cGgZCo#Erbn@grC<u*o
z3wls-0|0LL4dIrrtS{p}yMyo$zw|Im@~|t(%Y=NnAhZV7^!XtN7?qC*LIYh(k0>bs
zKnnbZO5tkMqbx<QW67`JmmXs&9&@F5RLI8#p*65ZidQ&5PQFqQ8t7VjrIG>wq`+^8
z6rlG6SG<Zjy~^eEgpjWm^n_rn8Tr=;dW}e5h>^!b@W{Vb5E|(g^jgIY0Jz~dgqs(v
zzFvn82g|S5Gi29aPYQaSkZ%z52GQZg<QsJ%Dc__EDfwny@Z?*#KyOrxQFs#yF~+<Z
zg(Mf=f<lU3FXV)v2~bPQx9UPlzD*ZA`F351$#-yp-m0jf@HRyag|{neD7-^ads4`E
z3VJ7~rR2MG!ISUSg_wMgE+pl9xj^qy)KGZ0qK3kI6g3py%hWJT|9}yb@54?;{-Z$1
zeLt%La6!=h0AtV~z<iL0_d`5qA9CmH144dS5SjyPwBv&gFe*PH2n}>CeME@{0P}_4
z;`svVDdzE10gr!DJb*PkuHzQEJSsmb2=y94A5|;>fCYYwS)gO@cRTj6qRxMuU;6}$
z@d;Oqj|utDg3ugTBgV%aASXX52n}>CeNxE*0CM0rL=Mn<nkzoVoId4pdRoX&3wl~G
zR&$b{5%d|6=5VbZFoe$vLL=RRKC8F^05|-GaPtUL9ZcuvxYv2(@Oggi3oOeQTv<LR
z<i7|)b6`!cKkope@{5AdK-bb2l^g&d2Yy54$WyB?u^f4leVJeT3d`{oSB@_U`LBY|
z99SdAmmMG{zbXg~bS-^V$pHXz;5S4L(EA!!e4ROc-R1N(A^%O#*92qD=zl}dH*|Ez
zLq+#*3PK~@g1)J^0RT7rhHxt|uWuEZ*SGn#@31W2ab@|IkpC_S&4D$&{<Z^*%I^w7
z16@nsRdN7;9QZAk1E}vYkM9-m_z%ScSi@rvx6tKL`F%mC*9iK)VgUdw@LSBHP_2Hz
zUCr|?Kjhbb#A5u&72^j&{-+=`2iA!3LkGyo9}7YQT}wY!asYrF_zjT*^nSt>KV?oo
zbvgY+$o~@b6Tw)`N&ZaG&vb;xLq+(X3qm8^f_|>J0RT7rhHxuT;$LvD^UUj){MxTr
zmS4HD{6ffI3qo^XO|O6H0Hg9Zg3v(M(r=U;03Zi`L*;lM$lJ}+Z&{8!^ZFgX_IsA&
z_pThj74i>)&>UDJ$L|~<C!Z072D+A>QE~u)9QX~91N5Hdia#=^Kf0Wr74p9YJu4V%
zM*mNO{-mQk9xBTJSr8iO7W8Mu4FI^|H-wuL<^O^n2T}fi7_w3RuY&#}<jDw~jBq5q
zRTn&YN*7}CHeE={(_EliBWl$ip;IWt7;_s6NiLj5Aw~ZqWG+HEP)o`)x{#8$>w+im
z(1n=1lM8f4QA6Q&MGb{J6g3p?RMh?|<XsWE3)EurZe2*qdvqZs@6`n?j=4a0D{3g*
zqo|>9ucC&+ixjm5JX{OHMhi>n2w_V69rk?@r26j1Pk?#}LpD|QQikj(J`e%X2P2r*
z2P4jw=*1EFvIv1PuttMG(*im9P=wGxBj_O|J^%=b-{Lt1>S5;bZ~>2(D;~fa9+1rf
zIr&J0&_E;T5yb-lc;Gjb$D_>SQJ2Sk5&4P;VN8HEJTMv-$jQedga#Txk0~AizyrUb
zJRWBrj~DQGrQ!js;qgF(n`j^>pNJ3|Xaqf>cmMzo{D$$M@;r3>&3BzV=`}>g>Tr61
z1Jp_Xm8uhvI&;EMhCFGEV)=PeB>4IGL7y;`${AEjGrBw+M=;N|o<~rVei=g-kz=+e
z4CRbQp0s?4=QM(l0R@v8E|(Qxi<5@wu}u&TB$#cS*~Y;(?yyZT8f+7Wt!@;?Ho@l+
z=vP{~%e-ThKtvpKRHdI%?5dcn04Uaa5&bB^!|U>*RHZWP46TKCP>&iMfK+sO^?VM}
z1y4~WtJ5()9<jYN))hXuI787%Xr!w9WfHwiI=!p`1WlnDlZ4id!g|RkF&>Us9Mi6M
zExW-r>_(IRtiXXt6G+RHMa-m`?3J1=0w)ga^k!QG4i?y_&asGAYVGhyl0`_89bP-q
z|2oxOA?MQE4zE+rvxxaLzr*X63y}X$vFB}|X7;-cG@HFH&MesHHWG1NBeii|BhBNw
zMq0piBK=o!L@uO=dLnNxJ1}$|>4-fNs11DFIaG}^5aL*gOnW_=1;<&7Xb}&<Vp=Sh
z&=R?n&QaA_TFSgT*b+NJ#v7<*V7M%g;SAr;cIGimp;^EX$$e(Hf>uam^W`e)RMlBp
z#SE7*!(|~1sg~L&XyHUUHgU$GzQl`6AYYM>Dw#lj%0;z%(FtrR&gAePbE$PGV77U9
zUDOgUIpMTe;?*d&cr{72tgR`k<vy(DM@)9{BPrMLBPF}}LGoOFw9B>p=#=aD(JjxT
z6s^w5^QoD-a`FP2O>1)ULYhO}Ik}#Q&dtd*PKVa!<OZAtt;@-aXaSv<lN+gy&S$~v
z=mHkDjxJ<b>S#U7Qcr2FtEUZIS5FslT|I3?UD~6vyuplr=XUKc<5T!b_M`d!at2Jg
zjLN1k>8LEFD3!9mgk#kvUB)I|1~q{U)?eb_R+)4RBgkUwMi4TfVEknap4Y=cuw&A3
z*Q67!Nmp9if+yq2skR9CBA%uii>T$<NXj~kz=11IU4un5QX@}dlSSaTlc#g0Mc}}a
zCw7)a%%<5q#Vt58sOEkpsD`H_L3KPG32NY}s^l|AoD$PauB)V3TvtggT!;0ZvC$=-
zz{#Zxd$JWHL>NkPEUr8-ap^Q?6DP;w+Ng~O1WriKqj@-Hl&U(bj~R(AjzY#CD1FQr
z_>}UrHSvXc3{yCb3``s+kjxP0i{@l-p{mZ(BJR9ht`$vqIkisE9Btxm?~RGOC}ZLQ
zs?fyC9r=ujyC`Mi!P|KzUan2Nocpkt@QBGJ{7Aw(;*rA1B_1R@`Oz+y^P>|E4UcZQ
zlB#KOPOhRFT9Si@sH3Ghc!&mCmV<|AqRyP`rkS)n2M;leR^;FzT4-es9-=~fzzSN;
zx=}%0tQ!@yhIJ!O-8^)0I+yF>w3h4Qv`(A&jJ(;4KYa=&9*K;_k<mxomsq{%@E@7{
zI3L$7(kI0PGpXS2LjV~EMq(9Wt^Oo7CThdQ^olNGRO0f3T0mg^+%FT*RjgmE*;fMM
z^-85#ujob)Jh;kX%)Sz9krG_9sKO#5B5OdW#-Wo0ghP2IRc(ugCfC5sLuf|z%?1xj
zAuw)L$#^L-z(Wck-i);jdKXQ`3fGs<@L<l+H)QyRN{Ja%Tr$yXGAgwp7i+LA$P7$K
z^gMjX*{6XAyGe`}C6qVN1Frg=cJ5~I`YO1oF$#}%cB1fgRN}r?K!o|_w}|Dnh>c=c
zG4?KrTp{8_LH+3*C$4eJBSw5FCQ7V1N>MOPSeFSbQ#=?%Z4v{`g9O#`WH1zZ;?aZ$
zH9*H7KZCK4M$~PkgdKM%rMrg4v=u64K!Vlxxh$ds3Pvfhh{krnqbRE$YacI|p`?{i
z(&%$j(n227m|X-e@|2OPWe8R+Sn=jhH*N~KI#WLlstwa<gMer_;~Ei9bR&Y}+0~J6
z32A{-`n427=-FRGa5b+r>(`nQXwUu{La=#lwtj84Y?;PubFcu@Le9~z&B2wRf_qYg
z-a6yTW|ZF|M(CJr0W-pyf*`a-c{v~;PJlQ;2<ictbIhnglCl~>M3rncmA-o_Fp+2Y
z#wUDzneXedCORpG)T-&EILwv;*T;sn?Fg=R@g2ERs=!^hJUm`0rFa}V4yQ;FDuE3r
zlGT_516EQrnJjdTpW%Dg4sS3ytJ7n7Oq@~I_7Kb<O9bLUI-grKS=CJJvbEQas)_`a
zCTmLda%=LdVu4Yye@*LYt<=-X4p!5uVp@DP&~x3*uX$;V{>(RHIRldVt6tyBBujXC
zt6j>Z%UcdkF%y%fT;8VbJx6m<3*<3EEG|1kM6-&L^=k6``-qd?XDnJw@qNZ54+4kp
zfIQrmCXbtn{pPx35T*UQjJ$$o8FV8IQxSRI=1(%+L0e7(Kp~M}Q$EuSNj?f$27y+I
zE{+~l-Q^`z5XpNBF{C!_ZRWi$?kPf9(_UP{(`j`>5*YLu&}$NhKir>ezz{Y#S2}^q
zQUh`d*7yJ!(Hb~7!5<d)x1NJGZ>#Fyf3fG1jeb4_a|Y)iQ(%)ylU)n|m!(ve8o@?y
zy#X0EDQ3gRlYyuv&NSgRBri+#fT2r#GtyGvjlrxu7<V20VL)7(Y#5NTpjn(sYN6RY
z+%4E%@U=)gEpT`)*Qg}+O~(GPEcb(+#NbnFzicBZ=R*E@=yV$!?|LN#%>`suXY-gJ
zLlO;&J4)Dxlp_JsDi?wV?g3gF3ji4!^KH=#9M?!RE~1zAeAj^SlI_mqs^l)x9#LG2
ztZc^8yI{5aCga6ozSR@fkJA`axB={4{kg@=aDVerDvt(_G!1H4e-konuzC`lXv6B8
z%8@CoD-`z=r?r!+^~FWjZrbwtGO`Y^T-N<4OHxh(S`=zYDv)p&Cv{8g_2Oopbfb`Y
zVs{_7rL(i;w4~ZpOzN(fZIxj=Eq67AyI^;P*|K(_#w#DC<`g5vxl#2f;e;hmSyD$C
zK%5&@52P>IvDM=N$ChHp=Ig=~Qoz-jOW(7tHvuk7b=qkeZ&K%b6I=k#vNtR;3M>zK
z72+lNB6vfgCcWA$9sC^)oNC%jHV~e&L`HcL0ELWyH>VY@)mJ#S4@dLcu`kwYuey|l
zpi*Vb&B}&-R)rc>ed!!i#mj%~x~K42Sh-W26)r&)tgpNRcYTznQr3?B@s`oCM_kdL
z1{b8CshYW9s+bF>kula*130DZa*bj{G1SI0A`|C(#s>C`b^WrD8f-X#>$hI534y?P
zwG{JX2r(pN(&`ZckbtH-ISZZ<OGrzj8IYkd(-w{Da}6(&i<H3ij8(W9&-$%rBJhl}
z*)w9b<a8S$qbTAcWSlkPB5asERqzo7a9JW8kyzG%jNFr^pQx3;??~b?M@8ILDM7kT
z1)LztLS?hmmL=5rQMSz|-Nts&7CIz_w(wyS1S*V@m#I}tpwMc~=O{EE`uM}h3T>CZ
zg$nJQu)u?|CUeT6oK=lI;Us3@yv11pL<G(mM%c^nBGyCBqKQpV1`Xq{Hg?(&*eOH|
zx}a^=w?ScS)`y)eQDGN?H45w)mdstKpcr?7b41C!6w#D1M6JmAb)EddRAL0Ic2Ali
z3Iqr;L|}_Nh0RP&$<{by4=H`vNHy4E>1x@+HS>O+l13qQqY$l`uJ^_s9sbRll8Pbu
zC>29$bqrCD681SfyZ*Lb#}FXbdPtj46~~ZTs^J)dEx)feQ^b&9;Tyydf8mSweOsB%
z!k4|&S-myR(WsHtv`MdDnj|*<l!BTTEVqiH4qIAsW?|I%!%Go$>Vpr{%8+_1H%As7
zc)GB^UxU%=&6(*Pr)tY7GO=Nt%1`XV#-XGXE(p8NQyaW12wC1;W;bE5PsX9(5%%L1
z{X2L&s&~(h4XItTq0VuyQq8oE$IOjaD?WW}vn+j$1$`upl)9-5Qj!F+Wl2PwNYO`L
zwH#S=nzP0hwT419IR-(MIpzl*z|W!~)>om|qrtot>Phg94OJ;;gXHgAV4TL~h3s4c
z8vf3OkAIV_1}Z6yAl8IaRL=yl0*^f_>+E7^mhZ6<#Ih=g6;cNsa{!C%h0Y3i{DoJZ
zhnu5d|Clq0Jq_0fvqMC7cF36>jm7Lp4kISC6Um7a*yr#zDZ)FeEI#ewGZetV=8Qks
z3gD=mpB8d~Z!YRJ1r%K=SV#-`RM(@0YSD=9wbMe`!MZn(=2Hiv-hyPcJV&QH{H={#
zgn6}HfMN$4X7K35V&rcY_oLO2#DOlJ130#b(9(Xn4Eokdf|mMgmgNP#!<nqp8xLn0
zE*zDs@G8??J&hiAQI|fx?(%!MhPq@oEyWjHtEd~NzN^u}bFu$hOUvatT8qQcW%7JD
z=kp!sd>$ZU2J39mn!yDMCGkR-8C>W~IfeSK$E#`ZOgriiT$a*VH8wgt0y4oN%?g?h
z;YaJKmDPVe5!;?Nfeo}?UWCRQ!C|A58t|CI#w?-Ecf=cg4jY-n#t;rbUF@raa#3;<
z=5sUVbCW-xTi~+r4H&2fVgC{Z2S%~Yf!Sqkr|njwwmS-?vCyX-_;hTC&*4(qjE}-D
zm6y{dc?EiPg)>)|12S`UnJwD6dY(c_ybv~5SJHOc1ebNyH0Eq4NbCadUCx{Vm!(}<
ze8^>8)-HcUckzhs3gJsTX&cYkPW62m8eB~~<@52}4eGlK=WBPCQ0LFrZlC&YroKCb
zI#4gbd|X4<<ju#m3J&JuItS+Y*h72leC%=NLt`-?J@|;kcT#((2TtmGx)v+~C)JCO
z9RjE1;X5-Ij>>&-Qv3aZp2A6GC_~q?lgjuej1SD@0f<L?@wwOmx{mh4h7bB$scyT(
zWgooFrhdmyG34|ls6}tGsxf7?!|Eq~4#)wG9q5;Xv;|*@ZBgT;g65%u&K{=2d7V9y
z2S@QC*Z@cl$)nTg@Cc329`5jn-{E64B5$MtI)YEeZp1ffN73J#=sJAmb%;OclE-OK
z-U4m8#j%6qfQ%j7Y>U<oMiolpg)lof;Y&G%&%zZpIS!uVj{XCerSYs98{@OaeGMIF
z4aKIQh#!s7b*%qmv`0^waa)+*F?p-N8>a;L;4`m+=zU6LiR*o10Brmt+JYH=i@5=H
zn=tA7xIB#}ahVf@{|h!UFTWd0zZf51i?M3_<7HnR<mY7QjDmgH|I?-hcB>8!9Y4yu
z6Tic}^s91Uwp;8*tld^S+ODzCbX=E1_J>z{wfWnY$&$ACa{;V|xUB?rlC_u#><8?&
zr4A6^jH@%5dff<vT(n!Nll5SW1km)@;-pWrMJ~MGB+mo3NN*^%xE`Zq(~ZK|PH{jO
zh=^SRE;m>MHd>UA!Ujs9RXraNq^OAx2ylw1(iK|OZ5{(jli(08z#?O={~`c}VGf27
zOFBD@Er4LGJd(3*(e#aLaLq4dNUJFczuSr=AI{j|jXB%L=W75<H8L)VQ%VGfhW#}5
zmr_!VY*X?Dp8D*|N!-ElT|%L!c1DqxldZvW5=%+k2vjS{c3daJO0pS8_$+cqfdKe$
z-wr<87k|VbgpR0OJPqbpWl{$-U!uIGx<tqyfG@^cFG(_)bZ3RLfy+2$-ASERAD{B{
zASq@=XE?Yps8hh5JRhByz)p;|f8wwl5O3d?8{N^3!X|CG)OQY7q{*L?RiXN_It&|J
zwdw-#HIQPBb0vHwAhjz{vaD7R|9uA@mNo1W*Mvv}R5$2!J9MxzcIb2|$fwiIbh<<6
z&<d=h&&4`gU>&_oo(G!eIU}?VkcmfYZPAPt*GM!j_8KcNN$Nh#jJ(?T?@ZR%KQ1YS
zubjb~d=m`VH<l?lScAIRv%oCf9GZ_sCT%XSj+3QcVMgH7d^(HEATtmST1+?ym<P->
z^I(A$bA)3@U9NK1e%_07(|n5K!&Bs@gPb@Ahr--6OrN*^tkWkE1li_sFvw8>IUFSH
zc%Z8=Ge}We-Q|Qe>u$A~4c!Ri9k4Q+>uQ7#++ZYx4`x&cLfHIULQ`Z5&<>o@xQHmc
zAx;NaV3j?KvAl>9+WFV>TlHXrq>*LQ0Ej(hgVAi=D1<a};Ef=Uq)<M2czaF0iODyC
zJaP?|Je<2CkDHdtaCD<E@>AR*35JeOPV<x6JQ4XQ6<eA)a}`6)OpYzMYT{^wub5Ge
zAXZNTEDlt0mfC{rnMsrO4*Jtda5Or$%&|q&H?ENs(l_7X7A^pteo4I6hOujN7J$I1
zJa`}@vf@Uq+SqZlh48{f&?GMaC6)7;HXrGlZv>zU^YFraaGCF{{(!@+(JalyNN5WK
zWVj&pkwyNaSkGu4j(Hapn)3%uy1LPra_FZKc}pYbY6t93>k%%Kg?}2sYML-(ts6nI
zfPyhA{xpJ@a>{IZf$5uq6N!SC*qSxlNv)aL@}Gxdzi+KMpx{_DUndjEL^DDp7x}m?
zlB{j!(1<gssX}8wPM=k=R_mLOn5`Peo{%tB?xohuki8xmP%r}VH8Ly{;VW!tY@kZn
z2=lSuPQg;m9f#nL8P(&)LlE{F737C2d&i-W8|ci0q%)nc*8nJ>)1)Au&P>)TEbEGA
zuZ}Go*2;hHNyrw^Y;k%&8;}Wmvux3f1lLG34xwqn9_9tQfcv2AFI0i^j!ygdz4sy>
zHAcp25Razw;Zdbg^w3lHJVg&~#6H){Kmi3a!IPZ2PsSD&p%s17Q+HPMqDw+WY^U=r
zQ1B8fzGjPn^f!D96i_g|<wM*AR5}r8Z#@9}m0tq@96_n9E&`SRmCoPmw?HM<8}sUV
zA-OOT%TU`6*v6#a1_jNZ<F`SD;}3?*z;M!UgItEs>D!<}hBz7vz6}bf+i!zhl=*NV
zKox#CQ0t6ehyQI*&|G~u;DUd*-v(hzm~i?E-9Z$>=dvaW;SZ2a6@`L1KpuLNUgTcP
zW_xkTdb&6rtb#TH;zjNz<9l@@=tw}pnC&KHvp35vv^j`ETVcnSxOTh^FK!2i?N0RA
z0w{<cmng`O9@{y3Y!BfE)DF<u;n3L%D4@exZ+JNq%#i`l${iteAn~PiDU#wIlH#sj
zcz;WRJV=fgjml;lk*fWOgd0?0pDM&}=UWWIo9o3-3jW)oORpRm&h!rU4INmzLH&K(
zy*(>eEaksSx|a4G8a&)L&^yw1Xoxby!-s}x<k;{~W<QM_qTyphBYlGzx}mpk;Mj1c
vVv~Nhw_SDF#>>`HAO2u&W-v1}LW8|S$9nO1yE21^hHqx5_kgOv_eTE<a)Sf5

diff --git a/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm b/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm
index 1e2c80b741e9dd2ef4e82b2a78e74bdf282d6c23..9c9d3b17c04c1c945b38c334b9c579377d8e15c8 100644
GIT binary patch
delta 3085
zcmai0?Tb`Z7(e&Uy)$>_&dZ&dojdQl&vC|e7hEvO-7*Z%mb$j9o32&{F)hA?<6CMJ
zggUEcA4G!f1ic|BK|&=a3qj-uA?TA-1O|Of{{RV!@I%(`+<SLsc57{SX3pDlo^zh(
z`Tc(P-g)7(mr`u{_#urkru}kyW`@yh>JdHMniT`#NK5_AZY$_|YxcT3dvW>o8&ccy
z!3@mu(U=`iBvVd0lkLg5`9iT&u2g&b!k?(saEpl<&BpId*7<c**Za8<vm-yh4MSH>
z8)f;C{(Nisn0RWUF(#(Q8lw|pe8cL|#?<C5Pp@7xjPc}C<Bic>J2p&(wA#{klRI|=
zR{BG%#y9K=tU~pUuT6^&oe~rC-i(2c-D0z6H*BAp+$tslm*(7WdQ&s@AKrIFHjHX`
z-?6=WSp`9oUR@zC>m_;7Nb0?-gKlD^F@1PX<JIY7(}#|;zUZFm*QOiC_rJL}6psD+
zol5h^swTRAD%=umfH58u4!6mcakDJf*~!o-8<CUY$~x;{lj4JnjW9;G&<=20FmCZE
zSri>+EUuZFZBv{lYD}=EV={_$4CZFd&votA+&ZgQO9?ojq!TrXktUej7p-ac=gvpR
zH2Gs}P34j*o1veIw={hHOW7zkk5gQ7yAp}=7)2>2uUSvDf_~=$37(`d9GeG(b`YN?
zPf?0Hl;mkLc_y@;GJ=g8>pj1e#z<3^O>)}ZA;#7sNVURY%om^GZ55i(4+{CfPCVIJ
z=UK`UyQ)%@Mc$Z2S>8j204Tk?Abr%+Jvza2@@BLU?{eX)iN3ChJmmwQdEaNA7Z%$T
zsTkN4eVZaLEw(9BIj|}FHf3H}Y*VFbU{m#Ns=Qa(bmf`%QD0!w=iBsgB6m@gg#}Tn
zge<hMsMKX)ouC~4D_v9U9430_Cn8Ox?j(`C6f3NBIVX*Dp;3sq0#b>Rz=tp#3&`xQ
zmwjIjCU@(C&0R>oq2;azP`3h9MF<F%m$L=As$#E>0#p_bB(GFMOj96OI0am2n8J!q
zs8D+=_(BXAM57NggKS_mMsX)i5!}b*VKOO<b^H6be6nTaH;w=ilp}Bml#2n-Ux0EK
z^R~>Scgio#yw^XP{z?9xugR^Mt>6<eC&Q5{c`Y*%eMb=m%;oZ4=5ekoR?O$CiR**K
z-D4)#yCu2Gv0qG3g5oN;1lcN}1jW=n6mvjFVR`W@DFAc2^8prhj~N)%eSpq0NJy=v
zIN&JvSxPJSS;{E)iU|t)zI$*CadaPNaF=p|ZN$fDK~!?{{LQ1blsmu+l;=gHvjh$U
zUTqE+F$y?b!m%xH$YH?^ZjwP}M;u&UM!GAM<5f7Rb~&kF6gVljyMYj>6%ONn`4h|(
z_x}3okCS!@8q@1%W5i3Ju#~6_01LeiR+9??2ZIFLB>vWp8Z8883TAVXmrKB*V{?v^
zmf?EKgtK{Gr!d!~WljGCIhz|81M;n5=Aa?yO@TPCVP75l>Vn$)loV6FNmvVTjZqL*
zU1&{Sm!IaAXKx3@usg(%N9x7=5Yv^wW3#Vc3RN+(<WRO>QKke%RuE!n03i)cRFxVJ
zfi^Xg1XL+%Bz<ZmfhtBRC%2dyU!U{Q?8^7u<vE>O%Ao7qC`IIh9L{wV+iZ29;u{Xs
z&{*nR&<;!jyxUAVnw~Hn;MSR+8BbGM(T<U$s_GzLP8ST)Y8nrNbdV$8=s!wNX@z#b
zV!IXM<6RZ@lw;wPryN<~DTmjDC@UZtM*BgbZsn-SwJZ&PsbSWms56Y(f;#hOJqn6&
z7q@mrrCnvrHOXyy^zzjUqaXqoRhFX14dr-V(X&_}5J?rJpz2~!f#cCZMMr50pW9eW
z$D(J?1W#c3a&7S8rD!c(3WzI8fwUM0LHZ`{2!T;!|B3_50TeppK~_+O0w1oP8D-nZ
z8D-A}{0L^R*if7l9?1sJ;w9sjfwznjqws?A*mnag$NnEjlS^M7J`>1tLWJ5`hFCQz
zftR3mr7;S!oKRd!C@#sd`kvYy(|Z8|v3ZCacuW~e{AbSkX$`9v8~H<hv!~e$QEo3l
zy{Fl0J1$g0mymAcP3g50DWT^}rxTJ_@{eurY!BYi9Hi0W89+FzcJ&}m2y<Icvls<Z
zm}z%|IMoX8dAHvU=TL#WVOi;4ss9;~&!BlPDo>j|3H#gg2vcJ<X5Wa-ec1nsK6hcT
Kq08GVUjGNGStthp

delta 2358
zcmZ`)U5pb|6ux&lZRe*yGwn><o$a>gZbRw9VwBZIiA3(2po?PAMD#^B&AJNR1=IzJ
z251FA6GP(Wnn*NJgHaO_9!!n#NlkoEq7S@~@Ti6t6XW88;RV!lrbB57W`AbRJ$KGM
zKi~J=v(NnYbe1)j_j1M<zwmme!>TLUyLfzMrm=D{w{pbg+R8ikeDTvMo_Mc=-hz5o
zEBJ{w>7BQkJa5~{D{r9TRFu>M^_}(<zZ>76`%i3^C!c?LVS$Z?uH^jw7hh%Nwc)n2
z@r*V$6}_g_x;w0TbD34l=b8tabIW^PTZnP%^!Ie$2b7bh9Zj?)YcQrDS7;kc;d{%`
zBoj$tKoX2;#)J*_4BNyQSduRaE4CG1*dzMZa5va!8A~m&C)mKIp$I7+=Qc(WsTz~4
z<-{47LmTYt$2*<wr-@HA{$qF6tZ>{68OMwR1Fl}NYW!;Ved}SaF56S(4^Lw1x^^_x
z=J*#TAuQS9l&3mvmem&9R!=53se=hG01BZmO1^}@Es=qY$U<5;pexUsP8z}l159--
zCDdb%J(YtTuya&vj!HHlC-M-JOgiEBSR&*Ht;51o7cf1a{(A)|L|+$rUl&9%bd;bJ
zIZ8c8NsNS!0D{O7^c;a04IO1DM~-sOQ5Iw5Xjh;TIVwFzMO4ARCCB1Wj#SR@4g*F8
zb{L1j!=KTCC3XTA#s(LTK$T4OrR}Zr1po(4asWs>W&mkyztkOrfFYuuB694jYD;#$
zdnS8``)4D-8PUWNfEZKz>$UX;P9TY~A#Cvm#y4ZW!%J*4<A8B#|3(WeU>F@Ae46QC
z;owcM9TyUKx-R0NL!3^3iwJ}m&#OzW^o1!Mf!{jGKsr4<I5D*&x68a~t(JBuH-FN^
zVwMop=>IyuGY`zk2_HOBK=}Tw6Mog@K&UCNVE%HVcScn`;oTSKcz~%sD%RD`z<w|V
zDX^)S6j)S1N~D2fp~y4DBC?L)I9NDBbI@kOp@Q2kxR9kdE;tm&6$sOQoDCQ!KNMFN
z9-(v_{_1V)Wc~*FeH;_y7exVlfe;ZTq;g?^%0;v|ais+%I*%+-P%D9Vzb8g;00HF1
zD1L-c>GMB8EBa`p|B6Xb0P-N9f35r5<8t--SAQR|17u*C=pRu^8og%dP<GIn9;$1M
zF!+%Cm%pU9G55G+wx}p4Uz<iS3cyT2I<q&F2-jKwt%2ZZMVWx)Edf-@#ww|r168OX
z(MwPj0%MJ1tZ@nbSfo=h-U9SVP(KtJYI`wfeiMP%QVYd3IBgUU)vVXmoraw!$-*%+
ze3&#PR6CqriF_N=fRiW4ra>dfrg8EN1<ohni?~RjOeF}iXCgTH>VeXlJSPqAcZvob
zGGK%lfM*Z`3_1!KfY?U3DLGPC-<q4$yMb%sA-I}_Y`FSpBV3I+!leezlhWA(%??>N
zycDu7C()xJ-tZA4*irsB#8KiFhZZ+87vp87nTHQ|8rzD(84k+B0)1?*FU$Sr;#v#*
zRw$)C)}s{-%BMm3G^iL7j__Y90s@n|SL_kKC{RtI<S5r0@5V=6%lw1pf>(mAs%s|x
zuWjwyv<*cF+lFlj`QF03kfUVx(rPdVCw0@FU?plOLh;<O8TL42XV|U?n$ha#Hr_26
z;YwZPP%SRS!`m(tCUvd8kN>HzH2n1M$9nlD@CvilskO6+B@2?I9J^yehZyKsFQs;?
zfZ)-j8YE&Ux#6wm#KN3H!exkX8EiM|NS~pkD#Yn*)Z*SI)oFN{EM{v6)D@A#c*-hI
z)<`ZE1?q>mnMv;g$C^B>X*H?-YD{X!O;w$ktbJ_V?mo9Kyp0udR%7;gySs1V1ue6;
lxwN!@e(uno1H0!I7u4$Hz3Q{cZQYCO=QORktgcKR{0}T_uOI*b

diff --git a/src/wasm/Hacl_Hash_Blake2s.wasm b/src/wasm/Hacl_Hash_Blake2s.wasm
index 8e69e8f79009ec683e00f949be0a3edf8511e7bd..814d98984084ca3d4daa6115489ed1c01751760e 100644
GIT binary patch
literal 21925
zcmds9cYIvOaejL^5C_}=I8>q$oCQD-ETAYBP$DIYj}j?Lq(s_Ob%Q_(q;LQM3J21b
zVv&?&l}L_k%SE=FII-iFLbupXl&UzrIEj-uy*P22)0{YV632F&@7sNE-`?E=rTCxt
zmp|}sXLjGr?Ck8!?(9AUQ{_`C2m<x_2QOR*hVD}5)Hysd=g;4z&Z|p2>dv39tX~1d
z`E$fYRiFW&o@f?Wf6kN|h_OZ1ns?0Grq;ezLBmM09KNEZZIno!kd5WYeBohqw7KEn
z)Z)~s>AAX7)2A{)F@HPsaP-XS(+i7BonCqJ*!0rE-1Nk@An=>?r!|>8J#~C``tCsa
zEh~sk&CQ&gp9Hat-=y6qc4}&FZsB-P=QpUJVFlW{CCK_siaw2{#i`Rly<eR|;qr-K
zh2LbAPvQ7I)5m8gkDWPjVtR41Jahka(BQZ2@Y_Ox_fIb_1i8KgQ^)5fZ#Z>&ZgTJ3
z)a>+@$!izp%S%)9OXVR`bzrJ|PgHema&q$I^b%;FJT-O8)Umngpz#v#>*d~`Icr{?
zJQ_5`BieNL)Y8->_%1Hhoe%2XegVV1GGAzJX)U(3cXW1juk7jV>mOJ(xLWjzce8fI
z`5=5g$m1#Qkz>g`E~tFNhaN$*?(S+S%9AtmGfP2Xu(smNX^1gBdFssE(oE30YS{*J
zQ{|<gxavT7?V-s7;n4$=dk=*N_iZ_f-^14)oZNfoE&Gnvss7sf!?)fN)n-y^5AM4&
zs;wKSefMa1D1JFBwbe)Vz4Ykeo5Mp9h5C)P(pxi~eD%!IJurLBr1Ip{{N0nY)At3P
zHG@^2^qaFySI=@JgYFHrRg2SQ*ot9>w+$!DH0Y5RY8~{hT&79Tmm-B7x1c|z>h777
z(-^FQR5LiS5Ufh6II%cA9So*a9A7wnU$A<0t!VdflEhsNWc>@Y@Rgw(t?6UF)~#rI
z?p+VU@GpJ{mYd0(dvoS|7>oshZVHP!FZp6apMk?-sOG}#oB>M3Oc>~#Zj_ufPM~o?
z3SoeP<U=*9^L2Yr2o7fVnATDNl$|R{0d$i_6%-1pUgh&r(9J-%;Qf}cctiYtvu>4o
zV89(RsJW6ZN|7?QNt<q$cHJSZrn)2@q~8*%l5R<%A7*u@bh<8OFV&?k>9SqwWS2T2
zQm5CYE<o&3SJWljD21hT>2Aqn&xhyh!yvC$N}VhBNVjA^K!Nca%EiY~&Z4|>BFN#V
zXCf$w(!CPsK8!^_B<S}f=mVr?jS#)A7)iq#jmF*(MG6kWg6(uk51@V(h^`8=PSXQ=
zPzH#ZD}fY*h>K;tS_Um7t10p->UdQO3!ns4C2Vq3Q9d0O=d?yQhGfX;|B@y(EW=j+
zL+s2DNDg_u9|lDI4@W(=jZ(ysVLc)vdX0>P(QudaD1cn7hP42kPR7D7e&ds0d7#*+
z*U383VNi-xq1U^b4YC0>%GGRiHRCdlnvAR2<Z3RH%TQD2YBsx?%jI&^WL?b`SF=^N
zqNd)}Y;!f+WjktCxS9!9vqN^Eroq)*;cBjwD^ZhkH9K9+F4={eMptu{tGQaPMop8e
z+3jksk!w(scQt!lO(-F13a)0atGQOLMNPA-xz5$>lYOXZaW(s0&Gm9UYFb^*0cn;^
z@sQjAU`q_#2w)-x4$2yU>m!inx<+o`<XR&)a&oPagPdH=vN?X^5P)qla1($lV&G;O
z1-Q|pH7bWlYgBF`tx>tz(%K!raTx9Q#=sE(`(xlGvKHV@kJef_Oj>K@2x+aAmsnbR
z;x}H3_SeP0Q2+;G;1(GJc$G(MOkPS_V{(+V#^e^#8qv4<rhl7n`nUV0e+MSdouSe%
z3p4uVVV!=31ac>+$MfWsfM4dpuR{At%*sj6eY{fNC9fpLx{p`6Vo6WQT^5olj>jb3
z$7ITU1?m{69;={w98`g^RBv^~lD=DxTS)FE)nlZ3EQKm$e}yOeG+@7PC-BNia6Rd9
zo!0lrG%=Rz30ExZ8M((oGDEH>$@OF!*J)h>dKQ%7sGU13={YG8V<_uWaw;sI<zs14
z&&!;xk$IAyCF$8T(t*6(>(D|~2T$Xbd%^ZzkL`k9lm%jJhfcd<Ntb2OLQ*Ezd&%|Q
zbgsS$EP<;p#TmSE7F^GIT$l8#Wr-Ne^^7Z)^=stS7LwPH>sfL=o5poX-v{*lpnSha
z`9A#}avw2Pjjxs0hJ~|ubV7{}$amNpd4QzvC+YjsNLSjyIdsVHAeKV<f&$+Q%42*^
zKd9uK!P!ooPs%0zkP=k$1%#)E6xjm+e>`ggGEfhb!NZjdUPlJRSq8Ty<&u6x394<S
zJYpCC00TU08K6hL<v*%`@J;_QzVtXnc-$A^QKi3A30f0tg?P*Z%K8Z<XkcsQ2_ppn
zNP%aH6rlGcD_&1dulG4Usq}X#c~Tjy9m=PaJf*_iSw4F(nr~2oCVUGjeS_fz0KD)_
z;Z<prZ)A^s6}*WrJxw{D_T_k^(r;FR*2LN#zsUni`YlS(z}CuJj1&MM1)ix=_?Glm
zO5x9yxACQCD8)0r6mM1f?Ml#^SS!WbJfN)Kp#%+Vt-QlX0RU3qnIZ+~J<E!BlG8hV
zPR}a+-AbNS25a^HJxabug$-x<?1kRnr36j*7WDou!wUd-;hDlK2;B<nd(quwiS>Pi
z+#2iMO1@X=_b7Rf8Vj=e`)who-)jp6{XSa=^!r(m?>CH5crOZBV%~>Bj)nK5P>}Ca
z`U6Tn0BSk?L0c&3AFzc$|DY{o^$)QiA2ifZ_yI!=g&#E3Q1~H3?cGZMu#z7JwSvBA
z3xWQSEoAkFZ6T*W!h&2h)KK`4p@zbT4K)-#LTXUcN1-wOF@>o5BMM>l<1_@|lEC^2
zV$dMLe3I(@QI6P;`XlxUrGHEbniFen<C7jx($6VD16wQ48PNb>yzs0YFQ9&$Jbt{2
z$4?j@#9AJ=n-->A(m$yL^|n%e(y#yk7I@aOK*v5#I`JCfr)oO?(|qk`D8|qDV*HfS
z&nrQ5VyzfI?Ez)|vr5pw*2>QsIRHQoJX7QVy-%^?=g8^jd`_QI`sbB=N*S!-q<=xl
zFDQm&J~0h`Q3;yxElh)7G`s+S7oI7+0>MBhef}l(xk3xS%-4Q}lKhG<$uBAWt4h$E
zSlj1c_JET9H6>_ZYvtFB8~`8(o~d$F7}c*+jta>>&DVZ|a{Pub$FD2>n@Z4}SS!b;
zJ)o?AO9>j-TKO#_2LQ-{XNnx4_Ze3FHaY#a&*?Kt|BjN+D1)`S|6L`&s~E@eWFY^n
z5;Wmk(EVo(F96_$X9};Xsr7s8^T+sdY=OVe*ZzQ#{DCjY?<xI<O3<8G+vnf+fRg?r
zC1_x4<&TUU03ZjRwQ>OU$K>(HRXqO0@F3RmxWlwC<&yqWC8)QR@~4Ic0I<NbmPNHu
z{TaJj;aC2guYHbUe9jl+&y@ZPC1_5p730r6psfE=2^!d1`AZ`Q0LX!7iX5Q#c~<-t
zIsKK->GMkewUW;(gEgG=-zfPT6WQ@(WdB<wXu`KJynkzW0RS&NQ+QPw@ZZ%;tuOGk
zzo#UB?@RJ`O8<irG$+>f`3oLU(*LLg4Q#Faqmcsu<iImkjtZmtr<$qt&wTBRl;exO
z9RH;BzbHX-Vyzti>;Yx{uS(Fs*2=#cIRHQoJX7QVy)Uui-^l6Td`@3d`pZhbqzu;T
z{wqqpVxl{qjPC!g1WouBbpP*$7Xa|WGliEI-TwpqO``jM5^~Y~t4jVu>HkvlUnYWn
z%@zXvbz8{lZ`eXk|2GTrHNzN%ucMG9<{K#FSom)g3i6*ye^bdfO?1DIF@=JD&=vyy
zkS%2O!z{>!%tR2d@E{6V79K(&$HK!X6y&Q)zb+%M%W&oKh%Mywqqb1ckJ$ni#4N}o
zh8hZw8fqv!W~ia?xS=Lgm!xS6%jb+hwXxrMA|ru*5|0G+dO|J@^<9LV7N5$1=o>N^
z)i-3koyT`(^cyn*%EVgx0Zj*#^_wz+1{NW2GU5Y(ka*UPDNs+7$J13j-fVafYk5F6
z2bA?&GJ*ycA#X7}0DuRcsXX3F9&hz|Jdx3F%LsHrtmOf1IH0Vb$p{))ggj$-000j>
zQ+d3dJl<Z#;~j<vv6jbE8Pmi7W&LbM(7@KpvxWx%@W3;Thcs27^AA07!DP&LM+x(W
zFsE~pbJ<NZw8&R8n{E^4^?C*J*ehV7$ecHZ!OkEMw3^j92%qz^o5-ufa7xlR9(aww
zKA{lha~qMT&sF5<FUi*A>B)B4JU!VKBDS_s8e5a6N54XPk1)4PCtHw-7ji=`erKSV
zm){v=>`xt$oTDv!gB_Tmrb*HMv`LFJO$43zX`aB*O^a?tCz127)Gaut>K3O0Hgk;p
z@~kdGw?$94ZGd18(gyMAENr8+PV#Iaz}bQ~XFL5V+vU&L?kH#3gcScukk&m8(JQ@l
zH+>G#Fa6xB3^>FpS;g(fphK*d)nh?Xi$iG9V>mGzatNfy$ATU`;t*?O%~&v?N2QA=
z2LXQisNOCapablZLAt+g5!Q9f5bL^Sgmv9AYPTwDp{liE#wDuS6K6Fhtzi%f*HJhb
z=tMKSrs81?QswOMdRecL8`m3=dSrD;Hj>v`PN}si`UPqn496=Nu81AxrV55VXqGTU
zHk}MN%Vv%Ax!xk1Om#`Nkl{EPj;An`cFBa_RQJL!;~D0bJQm|Y25bEaL>*Qr<+4eL
zwR;s9O6tx>tqXEEy2(}J%#n`w=6F!7=Fw(&aAexqqpkeO>TUeV>FxX}=n4K5^$z}Y
z>MQuuqp#%8fZi!RvbC&tNuO*h>#J~fv%RdZ#!1aYS??B+9c6uu49OK`y+=mm%CZiT
zuijbKd!<8mQCS^w6?@YmSF-~hvYWDW$~CO(ls&BLl#q3uve#ING*qZI|H;R!8=Q#T
z;0jFD25Gp&RL$yo$x1y~&|liA+Q6yW0CNC!cW#&iL_5ksHx$S=0{etQ<OX>Zi6cvY
zs^<NvTJWc8vzs$;P*i)XLzr{LtZs7%oGx-GIvfJ$ha8kHhv=4W4$n%5=#d@{RIfwy
zNgs!--ysHMfP=S63ew7c6{L-dF7Wg+ho3GELNm_+1N`)|u37q7*DR~dAm~A;YS5cG
zjmcD94g2<|>Yz7sU=r-G7R`e=JuEoYGb$b?uR+e7!4&-hH3Ehs6%5@}T~ooZ2hEbH
zIx3@NxK`HcF&TraFb8oM({*GxLWUzL45iVdiD*;-VFR&LfiP1y>GXKuO<kW*G<AJc
zJawyZcIx^h&D8Y~$<%FR57!HRvT%U-$?1*!Dd=(j6!j+lbn46a(*w7Kp8<HTR#{($
z^J<d~WxW-1Z(~_+gQFfVW9F}vO=ZmdUb(D{ncpv)%b5AA<nl6Rev|ccO|q4i(Inew
z8BMaCR+E<rDmE`WSeKV8SeKV8?bKaSq3Zm*9>UbkX3o`}SLaql)3-5vKDdZQ#q&YM
zs^WQsdb5d6B5ng_Z-eB(&L02&@0S$`h2;2O>NKK04*WAUk-OEzYO^qEam8q<;-YvB
z&1@L#&z={W5QYzWYA)vM4H(2=zE7Ib;V@gobq1&?p^^Uv@BKhUS*S=wD&jiOTyc<k
z7b@!ni=`5X%w2pf)B)AAY;{)B&7u%`$Pe?n2zG3&aYaHWcPOk1C{hJF{|pDIE_1F4
z5}5T4-oxNvCFfjh%HUvKh&hBsQ~>6<Q^CQ(?qXIK>h|!ihV!w_L}NZSM~bwKAVNYR
z8k}Yf%nB&3em3q_tF&TO)`Dvo^|~E&+daB%fN(v5q>HW?@nDTc<60hB3823Yqb?<N
zm=rBkR||S<Mu#d(1j~Pwqk~H;4YBbGFKEr1K=6EUnih=Fr$!In3ma$x-+F1vLwP<P
zuhdIE4@JR9Bm=ha#l_Y>3t~j-;CHjdVzq7jHMwJCIDPEMaN1l1*9;h1o3rGKEq%D*
z5$w07h)#k}Uh0c&^)@EJAPq7ME)tuswt;s(Zu>dvVk|5m3bOIw7wc>^<w!3ZO*tI7
zXcR6PO>tABNt$S3m|48{1M)_j^e)t+Zo+_5vGFAx3e8}hhc`?bVZ3UBHS`biFw08v
z+%1?ln@PSI<YBjtJe-0d&wx$xwow{+?iT2!Fx@j%dy_;I?j~R?*no4AmSQhbfx>1*
zRkg}+|5B6!y=sxcyC6lFe+41lND71NxyHm*AJ`V}{dB}4Av9KOmpqYhzesnX+g;w(
zC*W|&2)h-CS~Q{pilptL6Tu~=S`BydR#p*9wvLNCuz%{*-SI{Z!?+S}^nguID9(KW
zSCXERbi0)UpomQm+4Q8bS&7R@42f2UjDlF|I11x|()|^1pa;B;V9Cu3M0TWLW6P;!
z5X;$qS~adFaYS$piK8%x8%e7jbI7WtLYS-u!p*rxluZCD=^>1X$Sf8=LuT!<2D%vb
zENui(<hj<6Eg_8twQAPbZ8c(TgS{DOjCnNH0*Yvix?-eH)@U@Yp%=nb%=II9ygn?I
zk}(fWIsoenSyycC!>XVvkbjhSvf&4Fk$-5AKx1K@uG%Drppk{(0zBH+VZhkmN|f2S
zV|7H8GuiOMMBygHOi}njvj~B^ZGuDXfLro282L}X&Pv_xyqah<NtfE2C)hMl9^_7>
zfd+vdP&$OtFc*6*L!~wq;!!c)j8vBOW^|VMXwYn<ltB}Evs~=WUYM=%a%8)#mm}Mj
zi0zB=a+h!^^m16yI4@_^R*5nL3@2|q8-MDsw!-*jrD!uVfOf)}m>(oh==?hI1LxO6
z=NAvyC8S`e@4yV{2nU?U0S>n{tz)xk1wc`+JLqxXF>7XswDbOE7)Vg;)VQEoJrCX!
ztq1BNi-PM7gJPH9SJ&w*BRgV8{;5Z;;py1$FkKK$-9+G;oqJELIp~&35LKImUF3r4
zxv;!Ky>-d-bomMx>$6xdm+))|fl@HGAbA>cvovuOxX`L}t5|3`;m8cy_h8<G^@YiB
zZS{h)r~`|uBAItc8<}@VJDGP_4LgfLNU9Jfi|ee2Inh!EiKW=BuB2P-n$<mmumFAc
zVZaeJTmbI{6uHDcvL%!rR=RaRCi4Jp&aMItga}82S5qSz{jL}p4QtGW)MSpwOX|d}
zw&H$kqN>EL4$`e+`QdfjM3<^4j(B4AdI@9K==%`l9`d3%a3vWkd8`3Nau3nihtiB)
zb8}S`En=d$HbH0vq({8S4FfO92rj}pg8&qfUUqvO2Guj?vrz=eM^ge31OyZFH;Ec&
z?iox6#*M`Mn<`-Jk77+#o4*%*u!W)Bb5jJnSNLh91$w#VOS}v}DvAri78_-->XSfP
ziz}<6%yW;}$=;nz_KKd_i68eQ6HWXWR_G}mK@20wv|P^;yW>ikUE)3*UE*3l$DaKk
zhKiK*Ih0Flwnr15@Fsmu;q9Ip_6FDQrXbVjj8JWnjoC(N=8N=stg1^)7rG(%N;mOL
zgsMcD2!)tn+>?z^2o8);9X3J%VuUh3ZJ>wHgr%>vGeW_iy$C6|ita{FyS)en92UeS
z#+zt?1Ssk?*3%^{s#79V7uVM>=zNhQwCTMb8gW*7c+FpWVZBu=yPAl`d0FhOj%W`)
z4I}TgX$qz)8t~VMKYZYJZnz3wolJ4NV~l#Y4!NmD$z0rfUK$^`4O^GGdN?d>Xdl`e
zyhIxJ2De4~nF#xVJc3|`UcYJfChm6a2GMgGW}6*RXH0G8w%g;zwNOkS$3Tw0#e#k-
zLSkziV2Yv;=!)20WunzYVwLR}{3tNM0HUHDD-05hfx&?*ip2fI!2$6NE(BQz3}?D?
zPu}tNPRZ9P@!pA6Y~4i4-YKCGWm8u2Xsd`kUpIDW5Udwr6MLuVGmm6vX8e19$=gRs
ze{6;L%%fyJ^Qfi{I?>@0(j7MGfNwn*O1;tH=N$29aCBCCFl|9Z+~|lmIu?u3(So=Z
zEd)gL#Rv2dQpkbbb(iq10k&AffRpVZw^j+9)oVhfM`=ML$O{i489iET)obl0C=S|s
z45Q-&fq5o&65%1rI?2eoS+tthIEKRSoZbM`hFQH4Rx>V2HpFJMsY*s1CV4UidUFMw
zMV@wpp}R#k$Y!}5S{auu5Ob@nM!I$ra<$u#sNJY{)a{WSo}-)q6q)39S9B(Mg+Xb&
zkY<usz6ev?iC1@l=PqxZ0bEITl}ryKQ`{9B%Ptzrt`vT<lV9`{@gtlE)*P>ro%(7t
z-VF}BtCt(QOM<#+BFpZW!)|ieox%aAYvMsQ?n3XusDv1mJ@KgQh5Nt<BA}YAIj%Es
zeCAlx`#hKix*s1_#O`CiXBHL<tGFKDm0TZlI3S_EK@R8}Wsg3HULEwt>PA4(Sl!@?
zZmbR&l*S8bV|A14mpyPFH@}E6I}8#>!25_dX26x?NC{tgIQMZR*60yx^hgR{IV}4)
zW{2fwJ6<o5!}_IYcofu+R*%=wlAtagucI;bqojT`g*s5TU_5S>TPw!nHUlT)ak~d|
zJnoP?+<4sKjfcfzJnqEJ^VnhBA$P)Iyi9Hb3zMcZ4&&vxBb_*i03V_Fa8|z(4&zl|
zBCo&~Aup5L<yDx8ld)~t<cYouGk?#l=Q9X-@+7s8Y05N~DeF9r;S&-%kK-15d=_7r
z?A6n<SB}SabfQY+llcC_7nz@iBybiViyQ~Zd0oP%D)-2Y+<~u3N|1V1ZpU{ZC-C{l
z9KQOvTQ6Yb7Cf7r2Nc=lDOYqhdD@^fUP!aadtZbtF5=ZPc$Pg|1g<3IlIdaOKgzMS
zlxZ#H6n^-2<aXNPBD!W&8@Z4rS=49H{w$cBtv1QCB|%+ml4oNk?i-Mli1z9zIhNP2
zVMg^neEflxx!3*tm!8BbD3>``z>23eTJ`jNTP*uAo6Iqc=iQjljupW0i?xn}Ih&Yq
ziM<;#F>Y~Wv9R1WoW8Upk&YF#I|<_2oIFw&_9k`0Et6dhVOX#tatnJdl9<28@wUm{
zxg}GwHHeljNWFk9&b?n8Til1Sd3`LJXQNei!WIi4!<KhuD4T7BajjvylGy^gLc2s>
zAD3+^_7m6{U}0T>!bU<>p-f)Cmj~}T)R-@Cdy&j$M!?PkmnuMRf+_B5?11!RfrZa!
zp_$dt>}pT5gMcuTsoYhr7<HF58jWkaTa-39NI@1DCs}ZtOb7E63t+uPas}3o<74d(
z<c5$S8#eMKE_DRyg2W#Mng{R+oW=SJ$Ma!;1=y&F49i-cd|BpWRixH=U1tt#y{z*{
z1z2+Va29JXoNdcSSubNK4Hb*_c);q8d93kJa5%on*^yW$n{K{lp~lhNM4fGdk~cvk
z@$u7TL_@=uSr-{KN>l7*+Fl&mY>&3^5)R?h#&l~MHgs1yw}JR}NU`0!pm{kV9@7|E
zwi<|EQq!`x(}8YJkqD>>(3$Y)Y$4?6Y%@?yXM%JlQs~HLTou@%ufW>sN*UKXK@$r?
zr=cqWMFHgsSB$j98jZ%aX1F;Np1k5P6hob?&G6AM;CLi6xO(Kn3U`ov80*M*>OwWk
z9rW;{;@uqJYvgK9(QD)?ZuzglYH^RVGnh^F9@+dy8k~MKeOtJCD6tuuZw~8Ru3Isz
zHsfBU>jj`l^T_byen5)m!<ro3m!s@<e9BXgsIr2}h{7nNNP~fsaq{zai4XI$cQF>3
ztzm99i}g6-hdgyjzWmF);)P2PD-cr$BPA!y6K0lqvOvsv!ZD;(rqY;0M*w$5#2mU0
zUm}N|$i}d`Ob#865<#_<&gJ>QPDz}8Mh6*471HEeF;cDG9ARReUZfa}j%`$-jVK86
z*f)n<M4Rp)qfQV)qR36Q4nWalYj?$nsQX_LNjIWoM3ILY6gZ0GkQZUrMo%Q#XQSic
zgbj1?j4QZ;W_*lolqPStUWw3zyi+B4giA}_rsL5I1X<S$=Ilux_g4*hm$R>yTo5he
z8D1TnleUPR{gqjE&h|=_3C@)$6P)|)zr{fG<)M2&jt3H3iA^}m5bP`}!6MUv?8cxL
zXh5TjNL#or3ZN)B<Ny3b#i+XkH5z+!AsVP<GDTijhw$1kcny2YYv7QmFL^Qoii~2I
zeqcC-7xE&w9azLqBhT`AW*uF+hIOpL3!~sN>aDMVE6FGh1)z!OWNkF&GD<Ft_=%*E
zJ+I*%z3PeoTvhJ;|LlLl6mcZh;+6@utK8%o6{ozlXxuiRjsBHPGTib1iD`|7*fxSf
z5(<$*c(tin>O~a`To19x1H??Ry)P6@{x&C0?W!dqW-o)#%-bkV8^((3xOmk9|6%-1
zdt?Q+QJOgXCpSEs>qu5DUD6EqfNL>yZkVQ+;qlFm<ev?6nkj7EAN+)!(8+YRH#-t}
zWBb;<khIrZk#rGqE0UE4idQ7Pv?}<KS|xVyMhK=oZ+@t6?KrPmHfTWXiUe0OZK2r~
znCkRI$batj2wPYe+z^GVMoj<cQj?j#wPuuD{Ij`iF58y99ze%fbJ;1O5SdZMWv3Tr
zYpw^7?XuSc$hPJZR1Mn~^T|Z=63y988tCMI@ySF&A?htZnJB<g3yD#70x%abXrWyI
z3bDEVzyD;S&UxT(@so*a{ldP-F)x?>WWs0oEqyXk%@F&`geH7c1!V4F`_<+iwvX`c
zVf%!ld)PiIzK30f3;AEMDwvvk*glWs9yV_32T04~=iB-d3f%iHRCGxpu;61s8(5G8
zzeHe3#z$Yrj4=btwVblo*6l$d2`tQ=b0K?O6j*E{*iS+snzGj+uf7kt^nFQSxgK-)
zfIo+Cz>7D6!;M}**iXm>gaZbO1Hz3A2sfs11Ip&!lK^$S9Q5cg{SHSF1t?_YOD6AL
zBlSLgNDk?nB+xfk=rYiUA?IOF6E_2jR^m6gVx$$;Xf&?X%Dy<rw8lXOcUH|6<i?8T
z`TS$NIiZ5#Ab&yefADQOy0kbwb!uk*<fgsmzgw0kw{F?Q|L?a`jxU@#Ju^48G_x=-
z(~FA>i?VcPaen%4Sz3_AGxJL`r>5n^)XdzO#p$N~_IGUPmc!Q_-YGNqcbU_trstRB
Z)YSZ$sX4*F(_C1*kI>XfQ!%tI_#c8&M}`0Z

literal 21136
zcmds92bf$(k$ycptDW7M-JQ)@TeZrTR<eR6S+Zq4$&xL}wq)dt?e$1n$vYdg-dV}W
z*jhPb8yn}q5lt`{1jhkBIKl~zbc6$rbZ}&jbVLIthx@Dhy?#A2Z)M@{yYQVq>%Fe(
zepS`o6}qc?*2@(}mFIctD>oiF;_+4LusV!KVruFtHKmU8sF<26uCD~*u%Ln(kN_Z9
zU6`S(1r?R-CxXI%s5D=>Pc4Q)6j9dra*Ret^g{lLAKr}q)mClGP2@%gM=C}KM-yH)
zeKESf^T7D{{)x$Er#!H0aB~01;K~)A=QinzYcepN+dVwE$5U?0B4W9bp?zZmAeL~O
zG`hq_b0Z`BcY76XgCZJ6pykWFq}wFx(wLmcjeC`Dbqs~2E4>+RlOC7C?yCoP4-f1*
zuy^m^#6V%_x<RkXZQJCw#Q<J6II-VLb)KEuJu<LmbbMrB!$@v;aM{4d{bPm6+}LEH
zPgk9tD_k8^?HU*u*f%%{+WSUx7vy%040_ea`JSiye(0e7cwnbj6V7POp4?<^0DLDV
zE2g}PM~+~+Thp1^y83KGV^ecWYg>CqXIFR6%wC#tyHu(v&%Y~?uKKMgMz*z-RbgOg
zY-rNU%q*`sFb-aW1EU8<CWpNGo@pD5<O-8sc2Rk~neBmVhbFIv_CO&E1G%w11H*%d
zyymh#7Y5ws(x#<-I+9-N!t$z#!2*U{GsD-KlVR$$)3deqI@+db;&sMIVMfO5imBQ&
zv~Li@(H%3;dnX15y`Gqg-TTK6c{6*<CAeBtD(sQh8`J5c7Ezyb1&^wYra-+bRLw6R
zy%Boyinl-q5{bhPB&K|CzUOhZpXC~=$yRl0*w6ZE#7~ZBAfHY69#?USswlM*jnkC&
zJrt<MSHoOWu^t6)TXMZ_O=&>MkvyeAH)>QwA)_i)8dNv~bS=KG^|M>T?=xISNnpSo
zGN_R}*HgX7n58T?Py;toovzMPqtLJQRi0~O==(`-q9)s=<ng-HOwFcCP12<%h}7hC
zsTq)TsX6G9X%xefnz@A%$ti!T()ZHbiY9H;LKRPlpm_Uq^hGEqQEpx7rSNK7>19Z9
zJ9*rJIp_puOo1KD4nS&HGjF%WKnkg0G<K#Y7+BBGn9k<83-#R~+U+Op2zPN0bqQu9
z4^j{!EEaes^%#g|ipbqEw%su-fa(R+UcbgtMS0xMjxeDcOl-&fxT@4geP-NQI>R8z
zPVf5w$+-6gJvNPE#GyW(MYDJ|&GLh3&hs1qsc;zP0<dE;&;Qw<o_gknY&FlP`9!{#
z4~Bvl*qW2*B-AKdv(VNoqD81l*qVM@vzQj6roz@Nu{9^t$*4)%nx(d887)IirL9?R
zYgW(-)XcCor`Vd6v=TK{wq}*BSxu`^ld?6Z+L|@A1~t{TX05F`jZQ;NjjcJ|)~us-
zs7c$JGi=RzT92BHt?_Nm2HJp{T3fTx)|^RaqNdK)Y_c_*X)|i-ZOvJfq5g16&IYhF
z1hxP;B?Pw8Y=8>_P}Frcoh`;So3@B?&8DqlTp3ytez6U}@(?%&z^V{9m*xOm=+K%&
z+l1B}I!9>Dp>qwb)59;$Lwi32wgcD{0y}6fz*8MsbLl*xHJ7#vt+}+r&^jah;(WB<
z7y{1&a8?NHq<H|>I<)4|`9f<RJx^%Oqn$!)7GL11{)MjUU*xL(#lFXv_=+#}6MUIp
z!Otg;E&=t>PA&(0sRLhu_E*9xuXNnU<vc)_3&yyQD{L{(SJ8lh=qj0yE5&_W2@+B8
zFdho19H{1ssO|z)U<}m@Y%$NfX_tX$w@}Rq)m#i!$o_ms_C0{zz767&z2Lgn;kt+S
z(H_AVu7kE%;Hzn$f#_=Cx>vaFjpMq9hk(w5GBnt_!yz7~A;D<MJVGOW_Mki_CwP>G
zO%06->AaB6$C38vGN(ghB^}(4PsYJ^++jP$*U*??Oo#T{VxA}H8UxXUa2*$}<MCWw
z6)1qKE5#%}IRLH)9IgdENCm+du9LP{;A`oif#_P{dO)}yh~rw|7XW<-ln*(SU%=PV
z3j||^@rCq4KXVX|W(?!?be*Z8>xJ|oA$=&0bg>@XfDXAGJd97Kz;?=EdjlU)bc3Kw
zj}F^no^MoyN?k=aD&agOoUxLM)AO4YNZ+K2Xx}Wf1!ss~7?tyUiy~B;O1edp2LSqb
zmNP(CT$R670paTWZSv9WBEs#i2)8PJks`DftP$ci2Pp6ziqOE+(j8g~0FVOD7%4#S
zPN}#{INjxPx>NCs72T;c*39AEitbi^>YzN^F^%^qLKFE4q5dAt3jlcG8N;hs8($(l
zc8B0z`RG28<33l8mngnp5n2n@^!Q!}$nyh=(7@Ev16m3IkOI$GDO?qLP^55;<RSU!
zVUgltSBeJ}KcWb&1#6^u$N>ucs3J5lwe+Z#0sy4IGe!!~drT@`Dx6;Ga(Ybh-za)a
zX{;IdmnnLg@~aNYvjgM)az$t&Ut!!|u6Y3fFFa#-dC}7973glXwtA&Nc5(G8MXyl&
zYDKSB^SvZLZVD-WjVWaKwWi?l>!d)BYsM(N28E<xUW-CX3a>*UL$6f)dPT1XwG=;L
z3K@QbDR}%wQ%LfYQlKX^H5A^UsiE*jO$~)7HMLhMev_g%fm(*&YziJ9HH9R<#S~Kf
zRw>Y%H8m8DYHBFFMN>oJtwIgM^tTu>ew#u#{X2zt`gTzS;G&574#A*7gn6e7?^7~k
zPq{Pp4#j`32+aj+bmN^4kmq+PLIYDv@6w_Hz<l9ZK3_n+TX?*?gvUQ<9)dMIF48S@
zInVD=gnCm+@6jv(fCZlAEYPvH3!QNB@ZPe{zfV4UzliaESB&>6{zpY<E?6VR`y8Oa
z#}uJ~sik9D4gin?&lovC?*mftLE-d4m(vFn|C6E*D2>&e_(O_5q$C#06K3#XMQ9>l
z!3;jEc>w?~JY#rygo%#&{1NGM(I|XWKKqzR@-bJEk0}0UMQAQq)8~&mK%PIY2n|dv
zeO$`{0CM0ND@T!1eM00YlI)Z6*{4K~Pq}h@Lh-*SLUX|yIX>wC1^%=mG%&UFX)Ol;
z$bn~!9H93ZsramL`mD?8Gm8IJ(Pxy#nz8?!qR%M_;CM;||GXkJk*_fJpVzzqfES)I
zyh=>#3)1Je$;Y7veo;RAl1TC;SCTI%{x?NvE?Cp&FFHV;zpMxiOf7v`%K-p#;8`vQ
zP+t)qUn$}7@0y2T4Uda;3ti6hR~4b&RMJ;93jkn&XE}>frTUt5waBl0T|WDUi17_q
zjISyFrXn;KtP$hu4p884DMABNOW)FR0Dv5L#>fGB-<FE+2&eD3oW8C2KNNjiX{_eN
z-&OQo9og}e$o@S=Xd+)>dcUW60RS&NV|bM)@b{(9MW*!w`Rs=x$q!vgzOVQ{6`{Fc
zO`m_@0D1nAA~Z0y^dl_?0LX!7tQ<v3^<$Bv$h3YUpZ!$i_^B($j}`w+5t<9u$ng^g
zDDck}p@FHTpKCb)Kn^@(<N&>=rQ#RD=@%}irxpKF(bGy}&Dj4+(XVuL$5W#FGm6ke
zzQWi)qj>=UFFa#-Inn*s=x-F=e<P5M?!Q&^YsJ4)^gA6ve{TvN|G^ZJ{6|wr@t>qX
zzt@aW_yY<_!Tb@0lobAiLWX{$_|J;|tfTv1Od-QZ5*p+2ji!*~o1{R0(bP~lk`QVr
z+=xO-3OAvUq2DULIYBokWaV*-DWv#TQ^@dbrho;p6zCRw;z{9FO$~+HG&K}%*VKAt
zxO(G83(MyOVQ53Yb4LRCyF2lSP<IJr^G`1p$d2OO2@t&}fmyvL;cN_El;D>n2$ThD
z^anI8P~dwLga!sd_iFJ0KuA2x=M<>>gvWg)Jnq*#1Z#LeHVYK^fdrv}LC^!52LSNE
zGnU7L!s9`g#~lfNC_xw#!5SVI4GR?b;RK<9LD0jR2LSNEGnU6A!sC$=9*=4sf;Bwu
zPUt2YDDYzmLIYDvk7*tNzyr@X9#ma~PTzFr5uGAm7i7q*{1h_aDVx^RQ;P((q|Z%;
zypm^7CCz|RakWre<9UQu!<>TfDJQL2EofL}b-(8{0{e(UkjSm(8n8vC{KVKI*=5+K
zg>4#ak*K%1<P0ETn+e#OMsaL2l21p!eDNNBYMM^gQJqiH$vR=G@KS5Mgn3m6Qf#zt
zgSQG6s+(lZs{#4@nw4I&r14kcOs0<O(Me?Yi#rL<v15xP0FyFCVtJUe=w#OEWCI}R
z12sT=F>li-u9I?*;NgV8vDs$VWLsQ|Z4Hu@HOS|;fi$;UL<e<<yXmxuF6xrKO1DMy
zP>*akW?Doq_0IRQOcue!^Klf`XA!e#)_kv>XIsP^nls<)=DEn$w@bfTs8fa)xqESd
zEt0O!P%DwtwNjtdwbE><Yo)nnt1=HmHP27j3{_*~tmad_@A=41K|de}nql;V)0YLb
zKst<sI4`7yya*|Csm{|P;WbZ8YF^Cv0@V+O{Y4CC2vK~?i;Ebx>vV6(a0x9DhRB)o
zQd-K(kX6^&U|J>&`-NeD3`1(9g#Q<H4g4~mernQT(eEX&)-OU-V1-gB6oh1>Q-P_J
zv2#%iycEuCQl&V3mg9Wc?`2DQG-w_&GmX-t<?>4M3VEgY6nSNMrM$AdN?y&pT3+pZ
zs=T^+4Ykwq0<Wb`T2bKBsGCkH@aZ&@Ru&kkd0JKAGpLVN7kE9g-KQ4VM{;>hfj3YS
zt(BfN(P`3~COTa@&_wG*mS#Fb>Y8c2)HRbYb<MOvYY9~q4Q=`<oE^(a;mW`b&VZ>_
zQPpuwHOZBfq)M<5e`!p$N=&s1>HzB2AK^Sv>ro10BLbO5U>{Kk++Ye0)x-IuW2$M_
zR5Pxr)>@l^gOXvdw+Q$cnSch1Xq0hIag#;h98e~u#Ufg%Ri>xSBHF24CaS|CI;m5p
ztji+0saqzmhcZ+z{mM{-40MK?WS}$DB9l-nCx1A+qz<X8r7o$fr5-&AJQG7T)3KcD
z$W(iw->#|7bW9a0AsuF7#=#nn#CkzROm&v<nkkkuGiH2&nhl1tix^r{om0fH9nB)&
zFqh^E!+A81ag@dja84rCd0HS0XA8sGF$}5Np^0cz1YrX)S%lE08+E$hb4=GI6qv4y
z3Qe~JXH3^6sZG~KM5bFUJv@o<O2Pr+mEuM6%5cBDvhZbiHS-dAwZm=U)eX;8PbU?4
z88y(t0xyT{Eh_K|IO_faEWeEw7hw4vw4?yb@1m0nu>2lcT7c!(7(Z7-%SAJ4XoYA-
z4V@xdlctq2uxVN)b!l2Hb!j@)nC^_Cp-#W_CYWwAakyei9i9=GZ?%8SJBmfcF@1_}
zt@s#1y>|Ff#Ep(@_&F{sA_{>SV_88JINh(siGHFia@Xrv4LR~y7)aQbVz5-PQM`<1
z()Tter$|&tVU%jK=}HZF21s|}egJxx%%(~gcz=*fF&LgChDQa%gL@2S*+i8#OjZyU
zN_h|&IeI;Y0>dulFj&=+Lf8=_2kTDVl(FbyaamxA*$Pj*<dTGgPCq2mQ;|4a1OC2@
z`w2};MEAK)OGLq$78$gpjJOOsrU+{?OjDNNEa4fj;Idvj3&_k3G(0Q@(<C6gtJ}?h
zM+JEpy+%D)3XxO~mYtcBk@=0x3}Q}IsI+kJ$GkBe0FHU%=KjfMed**H%)Q(~siYd*
zCc*G~7LQLVDV>JSK{+Tv6_~nAw!YJXwUo?eOSR&=!W*+9Mib4-QSihpZ!?-^h-K?K
zNwm)>U{aG!Q)RZH(nJC%qDK1OQNhZ!7<XXOPY028j*YA^${@6c_L!|OF;rIB)5cJ-
zm{c$W$Bm)5m=WCZfj!AO4CJ5VU7oTKEq8gc80%W`DG_<uBk16cTROwRRi8+g!h1Xt
zrp2$BMi{0t^0G$|8^MkwQL-b6+R7dRs)B8|R^!lj6rr$1QD~JB$C;J*oFTbYRP+_}
zw9)s@l4T^W@<5H`E)VVl8UCd^B8NZj5OE9arNz0<1YDk4upxn5SuGc!K(rPKkS%fj
zZ5G#&#j337=qhZHnsJpV*o$G#+VDj?*tGj(y%cbHYR?m{90lumKmnU}VbdPRrj0vv
zs5Jv7J_xHBOQGLW+*JhQE>SoPP@tlXp)(BF?@*VhEbbD?aQEO^5!M~iw6Iykgj|zM
z)N3uN8s)$bF$SM{hq0Jpo(1`3mC`3>xL!*^vj7FYXtwYZh|!>2e~9N$HFi(EY<58r
zjk&Ug(Xj@Lm#`QNoPaEwo2YZ4HxG>*<Hhrx$<-+z?4go0KU>?02wxJEpOkCI@NKEk
z>Ip`F0%MA$G(12!x0o5}Z!t=5!r|RdfEwHYX^X2NrB@m71J%efG?j{b+bu@irchj;
zPFUPfL{R||D;0X5AaxRYAL+@oHwneLrGx1(jbf&Q#QUT|?-P;N-bct!>wScLT|oYX
zyw7p0U%U?n->SLpe=$l=5xk@1N%X4%FHA&|vL;UgXbtRMzmUohKh=yE{8YR6DaQ%G
zLk0H~yR8!dE>BIy*O<*bprCiSvzQkrR;J6;$k_FemzBGVa5S^v3-j4ZuT&Kt52A8K
zpbGGczL#wwyt#st5?|rN)8D++XiRUYF{0p(>dK1Q=&c^gJeXmrB;#hhqd1+KA{a}u
z6HsZn=#`ea&QOhLoGdnFK_5C9PgXU6&A14c`XU(jOHY6cl224GTrfGpMUqGubvx&h
zP;$8@F}4`=V3B|=c<9<%#I-dIb1Su2e-7VmU0WMGdEl1Xg`YtBK53~d!jyHuygI3g
zyWk#i-_z3Q1QgKdu*E<fq()y3Ev4rp-0AC)Sk2aPw=)xwYwH%*h7_UGZTS4MkO`mv
ztRb@(+Cz{Vw??fR1D7X4eiR&nkV&B$lc+Jpsd2AdPR`1fSBK1AX0+<l5hDtdqTAG$
zC)9;uvM(f!=#h_&9)6iEyg%^bHGz^>=><fj&^qzqC^Ys0wlA0ftn`DCDUG6023S^R
zqRFz`s}@z$x24T^9_K5pz{I6XW&t^GWEP<I$k1ln!~~rLbNcbXPFo^7g@{oXjLk+i
zX>2xx<;M6lU5FG){JqF7d$E*sp$3+ml6{Nacm}>&(=u6f=X3PMq_Z=ktQkxt(%V+M
zYXOP-t_?KWG>X#<i~-o<7Ws*>4cxCs^}@CuY_ayVY~hSG+ibwrG>T&@{`%Ps0RBU^
zE!LED)F?*jsL^PmhJICu&ym?}G*JVPL=F9F0KGbi8jX^6YKM3K>sGE&GznL(_&&6i
z6Ir>6mx@<YXKIl}mmBaNUU~u2CO6=z9co&#UMh<^Vrkiq(x`Le{fIivW-~7v`YQaZ
z#y|Wz>hP=*IE+j%TC=+fdba@Cn`%lN-7rPfH(Z33e?=u+5b`BAZSc;g>J8pS!FC|P
zK9PokC&Z7}44*4IL$jH3Y)EfN1L_j@YV}N;2qfGjB8BvEP-5vDEa+nsuGLL64!awL
zok|uFCsFnbg+_@iW)<6Di$+7Cni7Mc$`bt}2NSMj5%ufP>+7R=D_!G6-`G&q?zro4
z>jC2o-F5laBcc&x1lr{ACo5`z4GskBpf>$u1{1_eJa$*!*@jS8=&=#R%r6{Dse@i{
z{DQ=Q*`Rw!$Rb2JvxDC{Sk0E%>2+Y49U`)`L(c3NEM|u#j0B;bh>zZYjf(7Z^bR$T
zYxu~(&H@}QLF5Mt5uD^XzT&x}L9?;>=tJz9o2|!&L&x7R3}btdIBErfdPaH@;33Ka
z<RlghqtzJWpa$;|UI^5}VO|8S=_f@C!<EP45*cya;>aZDE-suzCUc>tyNnjn$+Q%s
z(of4E=5lPNR?uQ(Ggly$xrkR)tfy6uqg)9nP{~tl(W>NXjbeNdr;?|JF620>xCWoD
z1<$pPDgu|MwRzpcVCAqjRF<`(ENf%<;g=D;qKa$KHQ0es$J1yHpN_`sz+s)U&(;fw
zb$LQv=oHt59M%bkbuk=(3Vu6LiJ^qM;PsdlAG5MPoRtl59~)^Ss74|9Obr`%Aqu+5
zfki>_y9tmp?rF2576uEgIEyynrv$-f`fT!f3!TkdX+3X4ueLdJwG~h>S6gh+&eb^@
z#rPm@uFj>+v>xu`yc3wS?I5uOymvTr23($Y<mpWGJ(#l{;fU^#5#15Pm$uU;nX~P5
zo|&)nX*)j;4R?b2&eHkXnJ3hR^R+XizEi00jG+$H1(=Tu>B6G<xJbj%d|d3nG9Q=V
z>UB6DmpJobu$Yfaaqm2I7?;qca2S`-MPLy*jOXJnbL1dAzPt!d@)d9xSBef@PFK)n
zbTM5COB^V17+1ma*AF{BLm)?<s1`EibYsdH=dp`+f%*u3av_aq_b~6F4Lpb+_3RGy
zXm5$g`)FU0$oSoa11EV1pNe0~({8#NKZ&@61}P7zhv{PcDq=5w3^BrEw1@X&=Jq=}
zIR+@u$x&OhIytUUj1S^;@|qLS#R+^`0MCM>i@@cnkk>s7bg>X>OF^`y5W|lq=wi{u
z33N>lZQw#CX+r<f;UJhCELF*adHfW@s^r0t$w6UqFop?G*9QBtwER#a&4=hv>6-pY
zxwDc=98P1!Qy;8)8vbY3_b?lMT;#YhS*V9S6*pGrz+yI7m|L@{chuBip`y4qf~B-9
zd>ZWvK1ea^c`n;H;AfvE>x|X%FgIfS8y&@J0wiaYO<1MM64W%p07vW7V2#qm&0yO?
zEhoknXT^ps^18!8A~@POV5hIy;yhWiHI3rfwlbDfW(j?qWH%0?i#86}0$>GQgu-4z
z0o5$Y?soH2({?PhWan<Q+ivNRKj?_TBKwOJ0DdouVK8UXQGa?6WnrH(lHIl#^i67F
zkD762l!e>tMTQF}IdF!O>e?2rt2HvsB-s@rS<d4BJl0ta+^6M>9ObjGtURo&%)+V{
zODim_Is@kON=V`48eNYTX$y+rBo<UzT0ke^w{}_ltTpS7l%$as`h-Q+BC8@~#E#d@
ze%xQt6SYW2Xc07X5&9qIv-<(bmbTxxIf2k9PDT20>b#hj&|-7eBBOM2><liA!}|8(
z;xZ6l4k_@fNSl6HB9M%{mSw4i<cGpyE6c?{Esv21s1=~I!l84rK$gxj4TW@82%Qx%
zbo7Gq6f7teucC##8Z=isBeV)o5ENG0VlY}#!)RP?$BTW^C%`3p%V&a&W|)hwg!7sq
z8_-~-AkL~v!_k_|&Fn<h<K}llEV4XErQW4<F0~{{`f)k?!`br;IO(3iK}XypZiaib
z;z&8tiR-M^iB)6ECUJ@^kIg$BCvjR~6Sm^6OB1KCP1&1@N+0u#A|P2Wep>4;F8U2~
z9cBjkRI9s^O%>g(4`#zO!fZsW0<T&Rhn_M!Sz*)!LU7lX&~di1Xa~+{Tt?K-N)lTH
z4gv$@h2J*rxLLkx1{-8FEF0vyweM;PG}|<aA&mrWE65{bQA{5G*N|@$@@*gwuWiZ0
z-D>i<ETj#`G>Rh+k35~XkA@EZxg_H%-o6;6y?v+o!vb(pa<tjWU5<u?nwb=zZuKO>
z;<!%!006E$<8C7QLr#Bs0R`TErY#12lNv^2j^quK3uqCZSvU?cYUd<rqx>&+)jCNV
z$%vF~pjLh2IQn9E5mQ!HVNBI>Vk!rQ(TVA7d@u(+n&T|PfXmYy+>P=>dj}M7!M+@S
zKp>9GY#buaEzLf?JIJNB2Butt0dz0zmM>PKOBiOkB)&B*!<dGYNC#Ryk`$7J*i10i
zrV%8IC<JCDmv&`wrOj3^ACAjbTeGm8)|!Q_{NatLQ5;*b=33l)R@qD^0;Ck>EGUs}
z?DPp%-f*1281=y%qliwMJ0uu|24@tI%hb-LF_@p}FPb3TiIWgXc<J6tU~HjU9JAmL
ziijser(|^%oS#QQuNl@H1*=vKg~7_6pGWeB`pq4Xw8IH{Edtq~*QTM6PKRg}jJ<sB
zn#8_gt4LcR@b!QuQd3q3x&Z}2ugeyLk&qfJ&*EsBpodApP~iTPI161Uy@>M77b6(q
zNP0`4z5i=}3&!3bnMQk~%U?QksugqElU>o?vU?>Fg}`V|<nDR&k*Krr`B1LE;_i7w
zAz&-Vh8bvTCQ`~~0QxjUG_(akCREq|_jk`LtOtIMch5`57v|6p8=Cg+xy$f5-90a5
zh{KkMCM>fE(id9YYJH*AMK~8)T|&WyRu>gsXf46{@^^)bnCc6yE|2I!E3Q0a(Jwcj
z|4-jV$8IfC)Fp+$f?r*kz=B-qaRN&;Ka4*+V^*yj#bh^Dtf!5B<XR=gDy!Br19xf~
zL4P6&fythU^y?<1TsK951%Ic8@#kqG8{UEsw}Qh~Cm?JV$OeS7H53Mftr8Hn#&845
zq-di6brx-N=t!0pjv|Q8NZ#gkdbUjJO?(cW!{?I6=M{~m$J-(2c4s8c0~7?Ab8Rsg
z6{%q~E+3UmVUVd0gAA?##@)6}-!Io)l{d>v-<6O*aJG2o<iucZbZBhf;tl%G=oJQ*
zFI!ylr{C7l?){_VLnFD#q5WetI5Dw*f+i14j1BIg$^A5OU~F<|6o0cNH#Bl!Vz6ej
zdC%{=VEdWd*U%9DMBm`(;MgRM=Ee@>MhJhxZ~w$0fpYtFMc)E%dQs%BwEg$<|1aiq
B*0KNq

diff --git a/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm b/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm
index b1a26f75e6b90a9bd26ab9ade9ca6c22708f4a85..64e5c9a79e954f01a2c4d73f2a32b9440113175c 100644
GIT binary patch
delta 2511
zcma)7TZ>y&6yE3LBsrIy<Xn<W=Dv0^9j9Q$4mQ))V!KDDoid}NovB(y!qAH}y-hDD
zh)kxfii!&3E?ATbeGozDLmdQ_ib9_SEm-i?{sDbZ5i0ualaoxQQWZkZUi-Gz`o3?i
zePib0T#n73KBh6ov>(qdEiqcoZPZhZWig_s8_V_b@@K*FrFB=YNp0%l64tvc+sU}u
zoaYB&zEJEcb@%l4^$!fDF00j4gNbjLgMVuOnQ9dMU9+9^&DXH4DCdn{`M&;CW8I{9
zc)B(zW+!Xo(_(63bhb9T`_V@>j&6oM^YBz{eBa!}Y(lHtec#O9xp<fUFT18D_Qksr
zL#ub~y3^^bm~P!>jI8dvbKBz+duC@I6VvgP$(3L9x@H_ce&~d(8U3k4rw$%ueMpk@
z{(IzQy+>X$viiViJaoru^T!X=7UoaQA3MzktpoFC=4+=9zjQE>vcLaIf&77@Lf02l
z4Z%hj<F@d)LypWOdwr8zWRg|IbcQm5RVBCkEaPaMGT2&`J8jBWm|%4;$;cLr#|KuH
zlb`6?hjN?wb53A|T+d1pBTX<lW>vI5<+Ikb{K(qW^WHglPw0zwLtAY6cfhLhCZ99=
z(iXSLB3r&2KG=xI2qvQ?HOC>BXDNkSlShPh6u&ypQI30*<vt~Okk~<iU{l79=w9++
zX=-Pky!LPdV_O-EOpeX`A3h9XS6=2JX*8@0jS>`6*0XvjiztQRndGcF<@_OkwfF0S
ztvv1cTH$Jptr=sRCO{-l29^6Qq-6?dHwt_M(^MF1rx2NjDXi#Z$rHi)`xtU3hx95l
z$N@$+WjvqKh#<*Rl%y2aoo`=h@)=0(?g0AgIA{?jptR*eI4oau_sTP2S>AMS$a1zU
zKh4fqV2ODFFOYnrP?no=Me7|^J<tW0bGa=QT@itJy;i)?NT&_TkXK1A<eg_+aw((o
zami6RxMZs!6kQO2FrzD!)Ie7!1fZ_dW}vath0cSPi$(|@Jmn%JU%3b=P%c8Ar+nK5
z$VEb(E)8C!f?zxGGa68Kp(UIy&`E_OyhL5R8+i6WJf#ltbi<0p(*w8Z$mAs{^7(B=
z-V1>HsKEPis=wn@AFTLPZ+jR^POb2MTo6jf&+N$YfsSN_sdPF5ng={biLHg2N@<V=
z1=}V5(oPx;gq;-3;Usys!Z{)@BZji@cDH1YxjL2W&CBI){>UV8GONtPhpvpV5^-L^
zxgnez64XAYs+xxCgtb_tu;SQ<gjVN6QS1XjVEquQ+>u*?bp>7Z3>;cjicO|`n}+4r
zfg`Wyd`Mq$mEo?cgwZTP7E0LJrm(Y+1*4o-xKr3)yxJ6GQQnT+m}*0Z8;jC%e=w{h
zC@%zlj!fkSbpq)_l9BXpy3;a|4&`E4dcY@NRR<$aQQ;wfuY)^ZO`})%an8+6itvOF
zAp-9GN`z>Nv7qA`UON@la44c0jy`B0XrmxD9A{(9RMu8-5q*5zm#eb8Qgao=K+Vce
z6^i9WgjW=OZF(?Qr(!))d$WhYibE|bdWun~1>V)FnXVE+-3sWE!rPz{?*`D_E3X%}
zTh@8ihYfu(BflG14-WqW+(T_}i79Y_f^if;)DafST-nX2MwkaaSMBwzgfjP_+bieB
z*%RcAvnOL6$Fm<hsQVncvB6Oqxo+!LEHC^2@%~UdG{EKpHdiFtu%Q{$$;I>4rVUmM
zo2y`RTdJ0y4<4xel#E_Blo(HN1Kr<H9d_w_6RQ;7tc=`FyQA*7R*cRXamL+o>*^&6
zw7WD(V52&tm+rx+*`U0H%;Y!kSv?-VfF7VSc|bl7)u}wPMf+<%c_FO0{{-z}oUK}c
zz7}{fRlpN|D<$9J_v*i=<(K$eu;kff-gU0OiA*(CVa_#s#VS9quiPH0>2jj-!r$DW
B*Af5#

delta 2136
zcmZ`)U5Fc16ux&dnaod;xs#bm(#>}F+)cMlZ7XzJQ*8^~yV6Qgse-gmb`9HAJDbw#
zRz#6atx5$cxLo|91(Cj33Jc02LLZ8c76psG_#o)3Alf${D%5i(v)QaD1ai;)IX~a`
zoiiUl^TRQpbr#QX#u&fz=7kGveA(a4^UE_WZY_5wxUu~HLtlJ*nOh%Sz{9FOX;fqD
z60LcUsoS1cxcfF5VoV=0NAxY@1mBA*Sp7S9loy_Vd48TX(xbxM*>kV4(QWCq>-jR@
ztUG2yU(c8HFL}4uS!83*sm}S%?BeOy=X2bB_a=qAN*Ni(lGRhJ#h7%IkS@5QFp}Cz
zDBe|OH#HJrC?*Tih631ayzGIe7%sq7d_lUoX<V?!&FS<ucxaj31U&TH-aELp!9|hh
zxrc6KvB?xmM4o}Oeqr^hb;ICS^bzMSI)jpM^1y)W!)}w`)2H1h^jGel;Dd`;Le5xn
zdweO4tWP?V1zS2`gQJfX4(LTIdJ^cVxj^OcZX(N2mOhlE08<8-crcW>K#SAO1pL8r
z7i0Ue20D>}K4(P_=76odAuGVaP#h|h-UyV`*G!qhCM0B*10v-8r3=Nm)owB1@r?{}
zJA>Q;1XTw?bBB!?7Zg*F=D3S-inwtLli+}U>HZWF1%3R1x0wkpGU9+I0<bW;Df3`L
zo}ynO1tH6!K3JZPkz{*~WV;WD$m!3^$DMU+oByN3iFmt<XASX`4@^{K43VrNRo|I<
zw-f0TwLpI;s?M+P^!<<PFU2DvN9V|*5?L826iQd`iGnFv0**&^dPIUlB8DqQQig3N
z1wHKr5P+ZJ4}uT~5ckT!4{{^J4Sr1-L8#af{Fyz3FkIt3Mt&e%kX49f4MD0S_thcp
zYiMQM*C`%ZvZ@0oIvB}Ki2MkIvVo5pLytz#${uYREHlB-iEN;QP+zGOr^|A52pdI!
zCoQCIRCpwwHDsuw&9GUq$JBlPlG($o^NM+LOn*|S>*vZ|dmNJo%mb7-y4G>HmjEO~
z0np0w0L2ptRLImgb+3SN7+cxL>M$-P`kFvr6AA{ts2yP<0X$Qn9|&7|C9F8NGVQwh
zn{YDEsmJv9a;v^y@=Gq^g#sjgJnaV~xa&=o02!wlQ38WBT0+JdltoNRmw9PnV_Kxq
zYng^(y$qA6p}y|dHIzzz6G<K;;K%`+0-@wl%ta{bUGY}E!`r7LCvY$b?)u=TyEmKZ
zZcbq1X7J6Sd}w%OLd;ri<5t>g6@^V`pw{p&N0Or&Dl=>!v&q_zIHrFA`YX3tgnvOB
zL9NJeEmk|iLSuwCR-*%WaW8>5A(3Y7j#frhOjN~0RSvt2^k1t13X@Q*^*g;L>xg1~
z^>FQw<NQvfn8uQ)FT?i#L$3E9$l>5Y<S>J@&l5}u+mr160BSLTWUbrVS)DLNEv{Ug
zVNZiU!;WX#%yvKXaG;iDpiEi8De+T2o#^Q>(gz$@pPx9@{KHB&QO+_aO~`>s?EgkX
z9BZ&}h<p0p#L;w$&@fzA08Np)?lnD{Bt1!^mc|Ett8nP1gI(m(4AW6vrp_xP?l@2e
zA0-+u>0r;W?qAxFOZH9ju@qYdAY_FCh3E*$8aUER^+pqSFa~Z9mC}EX2t<g$p9MX;
zWvB6$qc3mS+5FV~_ZN;z_wKxhwR6^F-feI7=PfT7<ujdb_w3y4tEbPuIJ+>fpKU#?
Tqt=ntN4L!yMrTo<Y`yXiu?u@%

diff --git a/src/wasm/INFO.txt b/src/wasm/INFO.txt
index e7adb2e4..67b5a8a2 100644
--- a/src/wasm/INFO.txt
+++ b/src/wasm/INFO.txt
@@ -1,4 +1,4 @@
 This code was generated with the following toolchain.
-F* version: 96f90842af8c0137bdee87ccb7bd3ea92485efb6
-Karamel version: 1282f04f16a4e193f329708b22e0a4577d5dd092
+F* version: b2931dfbe46e839cd757220c63d48c71335bb1ae
+KaRaMeL version: 1ed8ba551e8c65fdbad1bb7833bd7837be0d89b9
 Vale version: 0.3.19
diff --git a/src/wasm/layouts.json b/src/wasm/layouts.json
index c7e414d8..d5f5b2dc 100644
--- a/src/wasm/layouts.json
+++ b/src/wasm/layouts.json
@@ -1 +1 @@
-{"Spec_Hash_Definitions_hash_alg":["LEnum"],"Prims_string":["LBuiltin",["I32"],["A32"]],"Prims_int":["LBuiltin",["I32"],["A32"]],"K___uint32_t_uint32_t":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Int",["A32"]]]]]}],"__bool_bool_bool_bool":["LFlat",{"size":4,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[3,["Int",["A8"]]]]]}],"__bool_bool":["LFlat",{"size":2,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]]]}],"K____uint64_t___uint64_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A64"]]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"K____uint32_t___uint32_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A32"]]]]],["snd",[4,["Pointer",["Int",["A32"]]]]]]}],"K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Layout","Hacl_Hash_Blake2b_blake2_params"]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_Types_error_code":["LEnum"],"Hacl_MAC_Poly1305_state_t":["LFlat",{"size":20,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]],["p_key",[16,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_MD_state_64":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Streaming_MD_state_32":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A32"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Hash_SHA3_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"hash_buf2":["LFlat",{"size":16,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["snd",[8,["Layout","Hacl_Hash_SHA3_hash_buf"]]]]}],"Hacl_Hash_SHA3_hash_buf":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2s_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____uint32_t___uint32_t_"]]]]}],"Hacl_Hash_Blake2s_Simd128_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_Simd128_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_Simd128_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_"]]]]}],"Hacl_Hash_Blake2b_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____uint64_t___uint64_t_"]]]]}],"Hacl_Hash_Blake2b_Simd256_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_Simd256_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_Simd256_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_"]]]]}],"Hacl_Hash_Blake2b_index":["LFlat",{"size":2,"fields":[["key_length",[0,["Int",["A8"]]]],["digest_length",[1,["Int",["A8"]]]]]}],"Hacl_Hash_SHA2_uint8_8p":["LFlat",{"size":56,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_7p"]]]]}],"Hacl_Hash_SHA2_uint8_7p":["LFlat",{"size":48,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_6p"]]]]}],"Hacl_Hash_SHA2_uint8_6p":["LFlat",{"size":40,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_5p"]]]]}],"Hacl_Hash_SHA2_uint8_5p":["LFlat",{"size":32,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_4p":["LFlat",{"size":24,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_3p"]]]]}],"Hacl_Hash_SHA2_uint8_3p":["LFlat",{"size":16,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_2p"]]]]}],"Hacl_Hash_SHA2_uint8_2x8p":["LFlat",{"size":112,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_8p"]]],["snd",[56,["Layout","Hacl_Hash_SHA2_uint8_8p"]]]]}],"Hacl_Hash_SHA2_uint8_2x4p":["LFlat",{"size":48,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_4p"]]],["snd",[24,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_2p":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Impl_HPKE_context_s":["LFlat",{"size":16,"fields":[["ctx_key",[0,["Pointer",["Int",["A8"]]]]],["ctx_nonce",[4,["Pointer",["Int",["A8"]]]]],["ctx_seq",[8,["Pointer",["Int",["A64"]]]]],["ctx_exporter",[12,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_Blake2b_blake2_params":["LFlat",{"size":28,"fields":[["digest_length",[0,["Int",["A8"]]]],["key_length",[1,["Int",["A8"]]]],["fanout",[2,["Int",["A8"]]]],["depth",[3,["Int",["A8"]]]],["leaf_length",[4,["Int",["A32"]]]],["node_offset",[8,["Int",["A64"]]]],["node_depth",[16,["Int",["A8"]]]],["inner_length",[17,["Int",["A8"]]]],["salt",[20,["Pointer",["Int",["A8"]]]]],["personal",[24,["Pointer",["Int",["A8"]]]]]]}],"Hacl_HMAC_DRBG_state":["LFlat",{"size":12,"fields":[["k",[0,["Pointer",["Int",["A8"]]]]],["v",[4,["Pointer",["Int",["A8"]]]]],["reseed_counter",[8,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64":["LFlat",{"size":20,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A64"]]]]],["mu",[8,["Int",["A64"]]]],["r2",[16,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32":["LFlat",{"size":16,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A32"]]]]],["mu",[8,["Int",["A32"]]]],["r2",[12,["Pointer",["Int",["A32"]]]]]]}],"FStar_UInt128_uint128":["LFlat",{"size":16,"fields":[["low",[0,["Int",["A64"]]]],["high",[8,["Int",["A64"]]]]]}],"EverCrypt_Hash_Incremental_state_t":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Layout","EverCrypt_Hash_state_s"]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"state_s_tags":["LEnum"],"EverCrypt_Hash_state_s":["LFlat",{"size":12,"fields":[["tag",[0,["Int",["A32"]]]],["val",[8,["Union",[["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A32"]]],["Pointer",["Unknown"]],["Pointer",["Int",["A64"]]],["Pointer",["Unknown"]]]]]]]}],"EverCrypt_Error_error_code":["LEnum"],"C_String_t_":["LBuiltin",["I32"],["A32"]],"C_String_t":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t_":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t":["LBuiltin",["I32"],["A32"]],"exit_code":["LBuiltin",["I32"],["A32"]],"clock_t":["LBuiltin",["I32"],["A32"]]}
\ No newline at end of file
+{"Spec_Hash_Definitions_hash_alg":["LEnum"],"Prims_string":["LBuiltin",["I32"],["A32"]],"Prims_int":["LBuiltin",["I32"],["A32"]],"K___uint32_t_uint32_t":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Int",["A32"]]]]]}],"__bool_bool_bool_bool":["LFlat",{"size":4,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[3,["Int",["A8"]]]]]}],"__bool_bool":["LFlat",{"size":2,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]]]}],"K____uint64_t___uint64_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A64"]]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"K____uint32_t___uint32_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A32"]]]]],["snd",[4,["Pointer",["Int",["A32"]]]]]]}],"K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"Hacl_Streaming_Types_error_code":["LEnum"],"Hacl_MAC_Poly1305_state_t":["LFlat",{"size":20,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]],["p_key",[16,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_MD_state_64":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Streaming_MD_state_32":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A32"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Hash_SHA3_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"hash_buf2":["LFlat",{"size":16,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["snd",[8,["Layout","Hacl_Hash_SHA3_hash_buf"]]]]}],"Hacl_Hash_SHA3_hash_buf":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2s_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____uint32_t___uint32_t_"]]]]}],"Hacl_Hash_Blake2s_Simd128_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_Simd128_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_Simd128_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_"]]]]}],"Hacl_Hash_Blake2b_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____uint64_t___uint64_t_"]]]]}],"Hacl_Hash_Blake2b_Simd256_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_Simd256_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_Simd256_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_"]]]]}],"Hacl_Hash_Blake2b_index":["LFlat",{"size":3,"fields":[["key_length",[0,["Int",["A8"]]]],["digest_length",[1,["Int",["A8"]]]],["last_node",[2,["Int",["A8"]]]]]}],"Hacl_Hash_Blake2b_params_and_key":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Layout","Hacl_Hash_Blake2b_blake2_params"]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_SHA2_uint8_8p":["LFlat",{"size":56,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_7p"]]]]}],"Hacl_Hash_SHA2_uint8_7p":["LFlat",{"size":48,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_6p"]]]]}],"Hacl_Hash_SHA2_uint8_6p":["LFlat",{"size":40,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_5p"]]]]}],"Hacl_Hash_SHA2_uint8_5p":["LFlat",{"size":32,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_4p":["LFlat",{"size":24,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_3p"]]]]}],"Hacl_Hash_SHA2_uint8_3p":["LFlat",{"size":16,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_2p"]]]]}],"Hacl_Hash_SHA2_uint8_2x8p":["LFlat",{"size":112,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_8p"]]],["snd",[56,["Layout","Hacl_Hash_SHA2_uint8_8p"]]]]}],"Hacl_Hash_SHA2_uint8_2x4p":["LFlat",{"size":48,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_4p"]]],["snd",[24,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_2p":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Impl_HPKE_context_s":["LFlat",{"size":16,"fields":[["ctx_key",[0,["Pointer",["Int",["A8"]]]]],["ctx_nonce",[4,["Pointer",["Int",["A8"]]]]],["ctx_seq",[8,["Pointer",["Int",["A64"]]]]],["ctx_exporter",[12,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_Blake2b_blake2_params":["LFlat",{"size":28,"fields":[["digest_length",[0,["Int",["A8"]]]],["key_length",[1,["Int",["A8"]]]],["fanout",[2,["Int",["A8"]]]],["depth",[3,["Int",["A8"]]]],["leaf_length",[4,["Int",["A32"]]]],["node_offset",[8,["Int",["A64"]]]],["node_depth",[16,["Int",["A8"]]]],["inner_length",[17,["Int",["A8"]]]],["salt",[20,["Pointer",["Int",["A8"]]]]],["personal",[24,["Pointer",["Int",["A8"]]]]]]}],"Hacl_HMAC_DRBG_state":["LFlat",{"size":12,"fields":[["k",[0,["Pointer",["Int",["A8"]]]]],["v",[4,["Pointer",["Int",["A8"]]]]],["reseed_counter",[8,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64":["LFlat",{"size":20,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A64"]]]]],["mu",[8,["Int",["A64"]]]],["r2",[16,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32":["LFlat",{"size":16,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A32"]]]]],["mu",[8,["Int",["A32"]]]],["r2",[12,["Pointer",["Int",["A32"]]]]]]}],"FStar_UInt128_uint128":["LFlat",{"size":16,"fields":[["low",[0,["Int",["A64"]]]],["high",[8,["Int",["A64"]]]]]}],"EverCrypt_Hash_Incremental_state_t":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Layout","EverCrypt_Hash_state_s"]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"state_s_tags":["LEnum"],"EverCrypt_Hash_state_s":["LFlat",{"size":12,"fields":[["tag",[0,["Int",["A32"]]]],["val",[8,["Union",[["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A32"]]],["Pointer",["Unknown"]],["Pointer",["Int",["A64"]]],["Pointer",["Unknown"]]]]]]]}],"EverCrypt_Error_error_code":["LEnum"],"C_String_t_":["LBuiltin",["I32"],["A32"]],"C_String_t":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t_":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t":["LBuiltin",["I32"],["A32"]],"exit_code":["LBuiltin",["I32"],["A32"]],"clock_t":["LBuiltin",["I32"],["A32"]]}
\ No newline at end of file