diff --git a/include/Hacl_Bignum32.h b/include/Hacl_Bignum32.h
index 84a839a9..709f22d9 100644
--- a/include/Hacl_Bignum32.h
+++ b/include/Hacl_Bignum32.h
@@ -56,9 +56,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
@@ -67,82 +76,134 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -157,22 +218,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -187,18 +256,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
@@ -212,15 +286,16 @@ Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n);
@@ -228,16 +303,18 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -249,21 +326,25 @@ Hacl_Bignum32_mod_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -277,21 +358,25 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -305,14 +390,17 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -330,42 +418,48 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b);
 
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b);
 
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res);
 
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 
@@ -378,14 +472,22 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
diff --git a/include/Hacl_HMAC.h b/include/Hacl_HMAC.h
index e1dc04f2..0f6a5c27 100644
--- a/include/Hacl_HMAC.h
+++ b/include/Hacl_HMAC.h
@@ -35,11 +35,28 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "Hacl_Streaming_Types.h"
 #include "Hacl_Krmllib.h"
+#include "Hacl_Hash_SHA3.h"
 #include "Hacl_Hash_SHA2.h"
 #include "Hacl_Hash_Blake2s.h"
 #include "Hacl_Hash_Blake2b.h"
 
+/**
+Write the HMAC-MD5 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 byte.
+`dst` must point to 16 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_md5(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
 /**
 Write the HMAC-SHA-1 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
@@ -55,6 +72,21 @@ Hacl_HMAC_compute_sha1(
   uint32_t data_len
 );
 
+/**
+Write the HMAC-SHA-2-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+`dst` must point to 28 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_224(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
 /**
 Write the HMAC-SHA-2-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
@@ -100,6 +132,66 @@ Hacl_HMAC_compute_sha2_512(
   uint32_t data_len
 );
 
+/**
+Write the HMAC-SHA-3-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 144 bytes.
+`dst` must point to 28 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_224(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
+/**
+Write the HMAC-SHA-3-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 136 bytes.
+`dst` must point to 32 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_256(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
+/**
+Write the HMAC-SHA-3-384 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 104 bytes.
+`dst` must point to 48 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_384(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
+/**
+Write the HMAC-SHA-3-512 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 72 bytes.
+`dst` must point to 64 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_512(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
 /**
 Write the HMAC-BLAKE2s MAC of a message (`data`) by using a key (`key`) into `dst`.
 
diff --git a/include/Hacl_Hash_Blake2b.h b/include/Hacl_Hash_Blake2b.h
index 3403fc83..8c3f4405 100644
--- a/include/Hacl_Hash_Blake2b.h
+++ b/include/Hacl_Hash_Blake2b.h
@@ -53,18 +53,31 @@ typedef struct Hacl_Hash_Blake2b_blake2_params_s
 }
 Hacl_Hash_Blake2b_blake2_params;
 
-typedef struct K____uint64_t___uint64_t__s
+typedef struct Hacl_Hash_Blake2b_index_s
 {
-  uint64_t *fst;
-  uint64_t *snd;
+  uint8_t key_length;
+  uint8_t digest_length;
+  bool last_node;
 }
-K____uint64_t___uint64_t_;
+Hacl_Hash_Blake2b_index;
+
+#define HACL_HASH_BLAKE2B_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_PERSONAL_BYTES (16U)
 
 typedef struct Hacl_Hash_Blake2b_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint64_t___uint64_t_ thd;
+  bool thd;
+  uint64_t *f3;
+  uint64_t *f4;
 }
 Hacl_Hash_Blake2b_block_state_t;
 
@@ -92,7 +105,11 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
  Specialized allocation function that picks default values for all
@@ -116,7 +133,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void);
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -159,10 +176,14 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s);
 
 /**
   Free state function when there is no key
@@ -198,10 +219,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_Blake2b_Simd256.h b/include/Hacl_Hash_Blake2b_Simd256.h
index af309dc8..446b1cd5 100644
--- a/include/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/Hacl_Hash_Blake2b_Simd256.h
@@ -40,18 +40,23 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
-typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s
-{
-  Lib_IntVector_Intrinsics_vec256 *fst;
-  Lib_IntVector_Intrinsics_vec256 *snd;
-}
-K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_;
+#define HACL_HASH_BLAKE2B_SIMD256_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_PERSONAL_BYTES (16U)
 
 typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ thd;
+  bool thd;
+  Lib_IntVector_Intrinsics_vec256 *f3;
+  Lib_IntVector_Intrinsics_vec256 *f4;
 }
 Hacl_Hash_Blake2b_Simd256_block_state_t;
 
@@ -64,34 +69,54 @@ typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
 Hacl_Hash_Blake2b_Simd256_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -101,21 +126,27 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -125,10 +156,19 @@ Hacl_Hash_Blake2b_Simd256_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
   Free state function when there is no key
@@ -136,7 +176,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
 void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state);
@@ -161,8 +201,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_Blake2s.h b/include/Hacl_Hash_Blake2s.h
index ac783473..bdf4a4b4 100644
--- a/include/Hacl_Hash_Blake2s.h
+++ b/include/Hacl_Hash_Blake2s.h
@@ -38,18 +38,23 @@ extern "C" {
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Hash_Blake2b.h"
 
-typedef struct K____uint32_t___uint32_t__s
-{
-  uint32_t *fst;
-  uint32_t *snd;
-}
-K____uint32_t___uint32_t_;
+#define HACL_HASH_BLAKE2S_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_PERSONAL_BYTES (8U)
 
 typedef struct Hacl_Hash_Blake2s_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint32_t___uint32_t_ thd;
+  bool thd;
+  uint32_t *f3;
+  uint32_t *f4;
 }
 Hacl_Hash_Blake2s_block_state_t;
 
@@ -62,30 +67,53 @@ typedef struct Hacl_Hash_Blake2s_state_t_s
 Hacl_Hash_Blake2s_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -95,28 +123,44 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len);
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s);
 
 /**
   Free state function when there is no key
@@ -124,7 +168,7 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state);
 
@@ -148,8 +192,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_Blake2s_Simd128.h b/include/Hacl_Hash_Blake2s_Simd128.h
index d725ee86..f1e0b641 100644
--- a/include/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/Hacl_Hash_Blake2s_Simd128.h
@@ -39,18 +39,23 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
-typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s
-{
-  Lib_IntVector_Intrinsics_vec128 *fst;
-  Lib_IntVector_Intrinsics_vec128 *snd;
-}
-K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_;
+#define HACL_HASH_BLAKE2S_SIMD128_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_PERSONAL_BYTES (8U)
 
 typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ thd;
+  bool thd;
+  Lib_IntVector_Intrinsics_vec128 *f3;
+  Lib_IntVector_Intrinsics_vec128 *f4;
 }
 Hacl_Hash_Blake2s_Simd128_block_state_t;
 
@@ -63,34 +68,54 @@ typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
 Hacl_Hash_Blake2s_Simd128_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -100,21 +125,27 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -124,10 +155,19 @@ Hacl_Hash_Blake2s_Simd128_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
   Free state function when there is no key
@@ -135,7 +175,7 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
 void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state);
@@ -160,8 +200,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/Hacl_Hash_SHA3.h b/include/Hacl_Hash_SHA3.h
index 8fb78fcd..18f23d8d 100644
--- a/include/Hacl_Hash_SHA3.h
+++ b/include/Hacl_Hash_SHA3.h
@@ -117,7 +117,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s);
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -131,14 +131,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/include/Hacl_Hash_SHA3_Simd256.h b/include/Hacl_Hash_SHA3_Simd256.h
index 617e8e34..72162d43 100644
--- a/include/Hacl_Hash_SHA3_Simd256.h
+++ b/include/Hacl_Hash_SHA3_Simd256.h
@@ -139,12 +139,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -161,15 +161,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -192,7 +192,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/include/internal/Hacl_Bignum_Base.h b/include/internal/Hacl_Bignum_Base.h
index f2e282f4..4e0b35cb 100644
--- a/include/internal/Hacl_Bignum_Base.h
+++ b/include/internal/Hacl_Bignum_Base.h
@@ -72,9 +72,9 @@ Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -372,8 +372,8 @@ Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
   memset(res, 0U, (aLen + aLen) * sizeof (uint32_t));
   for (uint32_t i0 = 0U; i0 < aLen; i0++)
   {
-    uint32_t *ab = a;
     uint32_t a_j = a[i0];
+    uint32_t *ab = a;
     uint32_t *res_j = res + i0;
     uint32_t c = 0U;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -400,7 +400,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
     uint32_t r = c;
     res[i0 + i0] = r;
   }
-  uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t a_copy0[aLen + aLen];
+  memset(a_copy0, 0U, (aLen + aLen) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t b_copy0[aLen + aLen];
+  memset(b_copy0, 0U, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(a_copy0, res, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(b_copy0, res, (aLen + aLen) * sizeof (uint32_t));
+  uint32_t r = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, a_copy0, b_copy0, res);
+  uint32_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
   uint32_t tmp[aLen + aLen];
@@ -413,7 +422,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;
   }
-  uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, res, tmp, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t a_copy[aLen + aLen];
+  memset(a_copy, 0U, (aLen + aLen) * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen + aLen);
+  uint32_t b_copy[aLen + aLen];
+  memset(b_copy, 0U, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(a_copy, res, (aLen + aLen) * sizeof (uint32_t));
+  memcpy(b_copy, tmp, (aLen + aLen) * sizeof (uint32_t));
+  uint32_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, a_copy, b_copy, res);
+  uint32_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -423,8 +441,8 @@ Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
   memset(res, 0U, (aLen + aLen) * sizeof (uint64_t));
   for (uint32_t i0 = 0U; i0 < aLen; i0++)
   {
-    uint64_t *ab = a;
     uint64_t a_j = a[i0];
+    uint64_t *ab = a;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -451,7 +469,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
     uint64_t r = c;
     res[i0 + i0] = r;
   }
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, res, res, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t a_copy0[aLen + aLen];
+  memset(a_copy0, 0U, (aLen + aLen) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t b_copy0[aLen + aLen];
+  memset(b_copy0, 0U, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(a_copy0, res, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(b_copy0, res, (aLen + aLen) * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
   uint64_t tmp[aLen + aLen];
@@ -464,7 +491,16 @@ Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;
   }
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, res, tmp, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t a_copy[aLen + aLen];
+  memset(a_copy, 0U, (aLen + aLen) * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen + aLen);
+  uint64_t b_copy[aLen + aLen];
+  memset(b_copy, 0U, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(a_copy, res, (aLen + aLen) * sizeof (uint64_t));
+  memcpy(b_copy, tmp, (aLen + aLen) * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
diff --git a/include/internal/Hacl_Bignum_K256.h b/include/internal/Hacl_Bignum_K256.h
index fe72fffe..33d77791 100644
--- a/include/internal/Hacl_Bignum_K256.h
+++ b/include/internal/Hacl_Bignum_K256.h
@@ -70,11 +70,7 @@ static inline bool Hacl_K256_Field_is_felem_lt_prime_minus_order_vartime(uint64_
   uint64_t f2 = f[2U];
   uint64_t f3 = f[3U];
   uint64_t f4 = f[4U];
-  if (f4 > 0ULL)
-  {
-    return false;
-  }
-  if (f3 > 0ULL)
+  if (f4 > 0ULL || f3 > 0ULL)
   {
     return false;
   }
@@ -104,11 +100,11 @@ static inline void Hacl_K256_Field_load_felem(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = tmp;
     uint8_t *bj = b + i * 8U;
     uint64_t u = load64_be(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = tmp;
     os[i] = x;);
   uint64_t s0 = tmp[3U];
   uint64_t s1 = tmp[2U];
@@ -589,7 +585,9 @@ static inline void Hacl_K256_Field_fnegate_conditional_vartime(uint64_t *f, bool
     f[2U] = f2;
     f[3U] = f3;
     f[4U] = f4;
-    Hacl_K256_Field_fnormalize(f, f);
+    uint64_t f_copy[5U] = { 0U };
+    memcpy(f_copy, f, 5U * sizeof (uint64_t));
+    Hacl_K256_Field_fnormalize(f, f_copy);
     return;
   }
 }
@@ -598,7 +596,9 @@ static inline void Hacl_Impl_K256_Finv_fsquare_times_in_place(uint64_t *out, uin
 {
   for (uint32_t i = 0U; i < b; i++)
   {
-    Hacl_K256_Field_fsqr(out, out);
+    uint64_t x_copy[5U] = { 0U };
+    memcpy(x_copy, out, 5U * sizeof (uint64_t));
+    Hacl_K256_Field_fsqr(out, x_copy);
   }
 }
 
@@ -607,7 +607,9 @@ static inline void Hacl_Impl_K256_Finv_fsquare_times(uint64_t *out, uint64_t *a,
   memcpy(out, a, 5U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < b; i++)
   {
-    Hacl_K256_Field_fsqr(out, out);
+    uint64_t x_copy[5U] = { 0U };
+    memcpy(x_copy, out, 5U * sizeof (uint64_t));
+    Hacl_K256_Field_fsqr(out, x_copy);
   }
 }
 
@@ -618,29 +620,53 @@ static inline void Hacl_Impl_K256_Finv_fexp_223_23(uint64_t *out, uint64_t *x2,
   uint64_t x44[5U] = { 0U };
   uint64_t x88[5U] = { 0U };
   Hacl_Impl_K256_Finv_fsquare_times(x2, f, 1U);
-  Hacl_K256_Field_fmul(x2, x2, f);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, x2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x2, f1_copy, f);
   Hacl_Impl_K256_Finv_fsquare_times(x3, x2, 1U);
-  Hacl_K256_Field_fmul(x3, x3, f);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy0, f);
   Hacl_Impl_K256_Finv_fsquare_times(out, x3, 3U);
-  Hacl_K256_Field_fmul(out, out, x3);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy1, x3);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 3U);
-  Hacl_K256_Field_fmul(out, out, x3);
+  uint64_t f1_copy2[5U] = { 0U };
+  memcpy(f1_copy2, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy2, x3);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 2U);
-  Hacl_K256_Field_fmul(out, out, x2);
+  uint64_t f1_copy3[5U] = { 0U };
+  memcpy(f1_copy3, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy3, x2);
   Hacl_Impl_K256_Finv_fsquare_times(x22, out, 11U);
-  Hacl_K256_Field_fmul(x22, x22, out);
+  uint64_t f1_copy4[5U] = { 0U };
+  memcpy(f1_copy4, x22, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x22, f1_copy4, out);
   Hacl_Impl_K256_Finv_fsquare_times(x44, x22, 22U);
-  Hacl_K256_Field_fmul(x44, x44, x22);
+  uint64_t f1_copy5[5U] = { 0U };
+  memcpy(f1_copy5, x44, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x44, f1_copy5, x22);
   Hacl_Impl_K256_Finv_fsquare_times(x88, x44, 44U);
-  Hacl_K256_Field_fmul(x88, x88, x44);
+  uint64_t f1_copy6[5U] = { 0U };
+  memcpy(f1_copy6, x88, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x88, f1_copy6, x44);
   Hacl_Impl_K256_Finv_fsquare_times(out, x88, 88U);
-  Hacl_K256_Field_fmul(out, out, x88);
+  uint64_t f1_copy7[5U] = { 0U };
+  memcpy(f1_copy7, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy7, x88);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 44U);
-  Hacl_K256_Field_fmul(out, out, x44);
+  uint64_t f1_copy8[5U] = { 0U };
+  memcpy(f1_copy8, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy8, x44);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 3U);
-  Hacl_K256_Field_fmul(out, out, x3);
+  uint64_t f1_copy9[5U] = { 0U };
+  memcpy(f1_copy9, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy9, x3);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 23U);
-  Hacl_K256_Field_fmul(out, out, x22);
+  uint64_t f1_copy10[5U] = { 0U };
+  memcpy(f1_copy10, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy10, x22);
 }
 
 static inline void Hacl_Impl_K256_Finv_finv(uint64_t *out, uint64_t *f)
@@ -648,11 +674,17 @@ static inline void Hacl_Impl_K256_Finv_finv(uint64_t *out, uint64_t *f)
   uint64_t x2[5U] = { 0U };
   Hacl_Impl_K256_Finv_fexp_223_23(out, x2, f);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 5U);
-  Hacl_K256_Field_fmul(out, out, f);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy, f);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 3U);
-  Hacl_K256_Field_fmul(out, out, x2);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy0, x2);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 2U);
-  Hacl_K256_Field_fmul(out, out, f);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy1, f);
 }
 
 static inline void Hacl_Impl_K256_Finv_fsqrt(uint64_t *out, uint64_t *f)
@@ -660,7 +692,9 @@ static inline void Hacl_Impl_K256_Finv_fsqrt(uint64_t *out, uint64_t *f)
   uint64_t x2[5U] = { 0U };
   Hacl_Impl_K256_Finv_fexp_223_23(out, x2, f);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 6U);
-  Hacl_K256_Field_fmul(out, out, x2);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, out, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(out, f1_copy, x2);
   Hacl_Impl_K256_Finv_fsquare_times_in_place(out, 2U);
 }
 
diff --git a/include/internal/Hacl_Frodo_KEM.h b/include/internal/Hacl_Frodo_KEM.h
index 34b1816a..78593991 100644
--- a/include/internal/Hacl_Frodo_KEM.h
+++ b/include/internal/Hacl_Frodo_KEM.h
@@ -182,9 +182,9 @@ Hacl_Impl_Matrix_matrix_from_lbytes(uint32_t n1, uint32_t n2, uint8_t *b, uint16
 {
   for (uint32_t i = 0U; i < n1 * n2; i++)
   {
-    uint16_t *os = res;
     uint16_t u = load16_le(b + 2U * i);
     uint16_t x = u;
+    uint16_t *os = res;
     os[i] = x;
   }
 }
diff --git a/include/internal/Hacl_HMAC.h b/include/internal/Hacl_HMAC.h
index ad344c4c..a9719654 100644
--- a/include/internal/Hacl_HMAC.h
+++ b/include/internal/Hacl_HMAC.h
@@ -36,8 +36,10 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Hash_SHA3.h"
 #include "internal/Hacl_Hash_SHA2.h"
 #include "internal/Hacl_Hash_SHA1.h"
+#include "internal/Hacl_Hash_MD5.h"
 #include "internal/Hacl_Hash_Blake2s.h"
 #include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_HMAC.h"
diff --git a/include/internal/Hacl_Hash_Blake2b.h b/include/internal/Hacl_Hash_Blake2b.h
index 6928d205..2dad4b01 100644
--- a/include/internal/Hacl_Hash_Blake2b.h
+++ b/include/internal/Hacl_Hash_Blake2b.h
@@ -38,12 +38,12 @@ extern "C" {
 #include "internal/Hacl_Impl_Blake2_Constants.h"
 #include "../Hacl_Hash_Blake2b.h"
 
-typedef struct Hacl_Hash_Blake2b_index_s
+typedef struct Hacl_Hash_Blake2b_params_and_key_s
 {
-  uint8_t key_length;
-  uint8_t digest_length;
+  Hacl_Hash_Blake2b_blake2_params *fst;
+  uint8_t *snd;
 }
-Hacl_Hash_Blake2b_index;
+Hacl_Hash_Blake2b_params_and_key;
 
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn);
 
@@ -62,6 +62,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -69,13 +70,6 @@ Hacl_Hash_Blake2b_update_last(
 
 void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash);
 
-typedef struct K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t__s
-{
-  Hacl_Hash_Blake2b_blake2_params *fst;
-  uint8_t *snd;
-}
-K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_;
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/internal/Hacl_Hash_Blake2b_Simd256.h b/include/internal/Hacl_Hash_Blake2b_Simd256.h
index 4dd986b2..04b091fc 100644
--- a/include/internal/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/internal/Hacl_Hash_Blake2b_Simd256.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/internal/Hacl_Hash_Blake2s.h b/include/internal/Hacl_Hash_Blake2s.h
index eccd92de..279c472e 100644
--- a/include/internal/Hacl_Hash_Blake2s.h
+++ b/include/internal/Hacl_Hash_Blake2s.h
@@ -56,6 +56,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/internal/Hacl_Hash_Blake2s_Simd128.h b/include/internal/Hacl_Hash_Blake2s_Simd128.h
index 2c422949..77505dc2 100644
--- a/include/internal/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/internal/Hacl_Hash_Blake2s_Simd128.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/internal/Hacl_Hash_SHA2.h b/include/internal/Hacl_Hash_SHA2.h
index 7dade3f3..d61ef455 100644
--- a/include/internal/Hacl_Hash_SHA2.h
+++ b/include/internal/Hacl_Hash_SHA2.h
@@ -123,6 +123,8 @@ void Hacl_Hash_SHA2_sha256_finish(uint32_t *st, uint8_t *h);
 
 void Hacl_Hash_SHA2_sha224_init(uint32_t *hash);
 
+void Hacl_Hash_SHA2_sha224_update_nblocks(uint32_t len, uint8_t *b, uint32_t *st);
+
 void
 Hacl_Hash_SHA2_sha224_update_last(uint64_t totlen, uint32_t len, uint8_t *b, uint32_t *st);
 
diff --git a/include/libintvector.h b/include/libintvector.h
index 99d11336..11e914f7 100644
--- a/include/libintvector.h
+++ b/include/libintvector.h
@@ -19,7 +19,7 @@
 
 #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x) & 1)
 
-#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
 
 #if defined(HACL_CAN_COMPILE_VEC128)
 
diff --git a/include/msvc/Hacl_Bignum32.h b/include/msvc/Hacl_Bignum32.h
index 84a839a9..709f22d9 100644
--- a/include/msvc/Hacl_Bignum32.h
+++ b/include/msvc/Hacl_Bignum32.h
@@ -56,9 +56,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
@@ -67,82 +76,134 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res);
 
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -157,22 +218,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -187,18 +256,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res);
@@ -212,15 +286,16 @@ Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n);
@@ -228,16 +303,18 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k);
 
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -249,21 +326,25 @@ Hacl_Bignum32_mod_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -277,21 +358,25 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -305,14 +390,17 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -330,42 +418,48 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b);
 
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b);
 
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res);
 
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 
@@ -378,14 +472,22 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res);
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b);
 
diff --git a/include/msvc/Hacl_HMAC.h b/include/msvc/Hacl_HMAC.h
index e1dc04f2..0f6a5c27 100644
--- a/include/msvc/Hacl_HMAC.h
+++ b/include/msvc/Hacl_HMAC.h
@@ -35,11 +35,28 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "Hacl_Streaming_Types.h"
 #include "Hacl_Krmllib.h"
+#include "Hacl_Hash_SHA3.h"
 #include "Hacl_Hash_SHA2.h"
 #include "Hacl_Hash_Blake2s.h"
 #include "Hacl_Hash_Blake2b.h"
 
+/**
+Write the HMAC-MD5 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 byte.
+`dst` must point to 16 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_md5(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
 /**
 Write the HMAC-SHA-1 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
@@ -55,6 +72,21 @@ Hacl_HMAC_compute_sha1(
   uint32_t data_len
 );
 
+/**
+Write the HMAC-SHA-2-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+`dst` must point to 28 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_224(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
 /**
 Write the HMAC-SHA-2-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
@@ -100,6 +132,66 @@ Hacl_HMAC_compute_sha2_512(
   uint32_t data_len
 );
 
+/**
+Write the HMAC-SHA-3-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 144 bytes.
+`dst` must point to 28 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_224(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
+/**
+Write the HMAC-SHA-3-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 136 bytes.
+`dst` must point to 32 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_256(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
+/**
+Write the HMAC-SHA-3-384 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 104 bytes.
+`dst` must point to 48 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_384(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
+/**
+Write the HMAC-SHA-3-512 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 72 bytes.
+`dst` must point to 64 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_512(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+);
+
 /**
 Write the HMAC-BLAKE2s MAC of a message (`data`) by using a key (`key`) into `dst`.
 
diff --git a/include/msvc/Hacl_Hash_Blake2b.h b/include/msvc/Hacl_Hash_Blake2b.h
index 3403fc83..fcc2d5df 100644
--- a/include/msvc/Hacl_Hash_Blake2b.h
+++ b/include/msvc/Hacl_Hash_Blake2b.h
@@ -53,6 +53,24 @@ typedef struct Hacl_Hash_Blake2b_blake2_params_s
 }
 Hacl_Hash_Blake2b_blake2_params;
 
+typedef struct Hacl_Hash_Blake2b_index_s
+{
+  uint8_t key_length;
+  uint8_t digest_length;
+  bool last_node;
+}
+Hacl_Hash_Blake2b_index;
+
+#define HACL_HASH_BLAKE2B_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_PERSONAL_BYTES (16U)
+
 typedef struct K____uint64_t___uint64_t__s
 {
   uint64_t *fst;
@@ -64,7 +82,8 @@ typedef struct Hacl_Hash_Blake2b_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint64_t___uint64_t_ thd;
+  bool thd;
+  K____uint64_t___uint64_t_ f3;
 }
 Hacl_Hash_Blake2b_block_state_t;
 
@@ -92,7 +111,11 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
  Specialized allocation function that picks default values for all
@@ -116,7 +139,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void);
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -159,10 +182,14 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s);
 
 /**
   Free state function when there is no key
@@ -198,10 +225,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_Blake2b_Simd256.h b/include/msvc/Hacl_Hash_Blake2b_Simd256.h
index af309dc8..f1799e25 100644
--- a/include/msvc/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/msvc/Hacl_Hash_Blake2b_Simd256.h
@@ -40,6 +40,16 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
+#define HACL_HASH_BLAKE2B_SIMD256_BLOCK_BYTES (128U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_OUT_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_KEY_BYTES (64U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_SALT_BYTES (16U)
+
+#define HACL_HASH_BLAKE2B_SIMD256_PERSONAL_BYTES (16U)
+
 typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s
 {
   Lib_IntVector_Intrinsics_vec256 *fst;
@@ -51,7 +61,8 @@ typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ thd;
+  bool thd;
+  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ f3;
 }
 Hacl_Hash_Blake2b_Simd256_block_state_t;
 
@@ -64,34 +75,54 @@ typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s
 Hacl_Hash_Blake2b_Simd256_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -101,21 +132,27 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -125,10 +162,19 @@ Hacl_Hash_Blake2b_Simd256_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s);
 
 /**
   Free state function when there is no key
@@ -136,7 +182,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
 void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state);
@@ -161,8 +207,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_Blake2s.h b/include/msvc/Hacl_Hash_Blake2s.h
index ac783473..870f1edc 100644
--- a/include/msvc/Hacl_Hash_Blake2s.h
+++ b/include/msvc/Hacl_Hash_Blake2s.h
@@ -38,6 +38,16 @@ extern "C" {
 #include "Hacl_Streaming_Types.h"
 #include "Hacl_Hash_Blake2b.h"
 
+#define HACL_HASH_BLAKE2S_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_PERSONAL_BYTES (8U)
+
 typedef struct K____uint32_t___uint32_t__s
 {
   uint32_t *fst;
@@ -49,7 +59,8 @@ typedef struct Hacl_Hash_Blake2s_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____uint32_t___uint32_t_ thd;
+  bool thd;
+  K____uint32_t___uint32_t_ f3;
 }
 Hacl_Hash_Blake2s_block_state_t;
 
@@ -62,30 +73,53 @@ typedef struct Hacl_Hash_Blake2s_state_t_s
 Hacl_Hash_Blake2s_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k);
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+);
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -95,28 +129,44 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len);
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s);
 
 /**
   Free state function when there is no key
@@ -124,7 +174,7 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
 void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state);
 
@@ -148,8 +198,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_Blake2s_Simd128.h b/include/msvc/Hacl_Hash_Blake2s_Simd128.h
index d725ee86..2bae1c8e 100644
--- a/include/msvc/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/msvc/Hacl_Hash_Blake2s_Simd128.h
@@ -39,6 +39,16 @@ extern "C" {
 #include "Hacl_Hash_Blake2b.h"
 #include "libintvector.h"
 
+#define HACL_HASH_BLAKE2S_SIMD128_BLOCK_BYTES (64U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_OUT_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_KEY_BYTES (32U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_SALT_BYTES (8U)
+
+#define HACL_HASH_BLAKE2S_SIMD128_PERSONAL_BYTES (8U)
+
 typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s
 {
   Lib_IntVector_Intrinsics_vec128 *fst;
@@ -50,7 +60,8 @@ typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s
 {
   uint8_t fst;
   uint8_t snd;
-  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ thd;
+  bool thd;
+  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ f3;
 }
 Hacl_Hash_Blake2s_Simd128_block_state_t;
 
@@ -63,34 +74,54 @@ typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s
 Hacl_Hash_Blake2s_Simd128_state_t;
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 );
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk);
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void);
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -100,21 +131,27 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
 );
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k);
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -124,10 +161,19 @@ Hacl_Hash_Blake2s_Simd128_update(
 );
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output);
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst);
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s);
 
 /**
   Free state function when there is no key
@@ -135,7 +181,7 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
 void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state);
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state);
@@ -160,8 +206,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   uint32_t key_len
 );
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/include/msvc/Hacl_Hash_SHA3.h b/include/msvc/Hacl_Hash_SHA3.h
index 8fb78fcd..18f23d8d 100644
--- a/include/msvc/Hacl_Hash_SHA3.h
+++ b/include/msvc/Hacl_Hash_SHA3.h
@@ -117,7 +117,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s);
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -131,14 +131,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/include/msvc/Hacl_Hash_SHA3_Simd256.h b/include/msvc/Hacl_Hash_SHA3_Simd256.h
index 617e8e34..72162d43 100644
--- a/include/msvc/Hacl_Hash_SHA3_Simd256.h
+++ b/include/msvc/Hacl_Hash_SHA3_Simd256.h
@@ -139,12 +139,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -161,15 +161,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -192,7 +192,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/include/msvc/internal/Hacl_Bignum_K256.h b/include/msvc/internal/Hacl_Bignum_K256.h
index fe72fffe..d8212bab 100644
--- a/include/msvc/internal/Hacl_Bignum_K256.h
+++ b/include/msvc/internal/Hacl_Bignum_K256.h
@@ -70,11 +70,7 @@ static inline bool Hacl_K256_Field_is_felem_lt_prime_minus_order_vartime(uint64_
   uint64_t f2 = f[2U];
   uint64_t f3 = f[3U];
   uint64_t f4 = f[4U];
-  if (f4 > 0ULL)
-  {
-    return false;
-  }
-  if (f3 > 0ULL)
+  if (f4 > 0ULL || f3 > 0ULL)
   {
     return false;
   }
diff --git a/include/msvc/internal/Hacl_HMAC.h b/include/msvc/internal/Hacl_HMAC.h
index ad344c4c..a9719654 100644
--- a/include/msvc/internal/Hacl_HMAC.h
+++ b/include/msvc/internal/Hacl_HMAC.h
@@ -36,8 +36,10 @@ extern "C" {
 #include "krml/internal/target.h"
 
 #include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Hash_SHA3.h"
 #include "internal/Hacl_Hash_SHA2.h"
 #include "internal/Hacl_Hash_SHA1.h"
+#include "internal/Hacl_Hash_MD5.h"
 #include "internal/Hacl_Hash_Blake2s.h"
 #include "internal/Hacl_Hash_Blake2b.h"
 #include "../Hacl_HMAC.h"
diff --git a/include/msvc/internal/Hacl_Hash_Blake2b.h b/include/msvc/internal/Hacl_Hash_Blake2b.h
index 6928d205..2dad4b01 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2b.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2b.h
@@ -38,12 +38,12 @@ extern "C" {
 #include "internal/Hacl_Impl_Blake2_Constants.h"
 #include "../Hacl_Hash_Blake2b.h"
 
-typedef struct Hacl_Hash_Blake2b_index_s
+typedef struct Hacl_Hash_Blake2b_params_and_key_s
 {
-  uint8_t key_length;
-  uint8_t digest_length;
+  Hacl_Hash_Blake2b_blake2_params *fst;
+  uint8_t *snd;
 }
-Hacl_Hash_Blake2b_index;
+Hacl_Hash_Blake2b_params_and_key;
 
 void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn);
 
@@ -62,6 +62,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -69,13 +70,6 @@ Hacl_Hash_Blake2b_update_last(
 
 void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash);
 
-typedef struct K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t__s
-{
-  Hacl_Hash_Blake2b_blake2_params *fst;
-  uint8_t *snd;
-}
-K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_;
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h b/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
index 4dd986b2..04b091fc 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2b_Simd256.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/msvc/internal/Hacl_Hash_Blake2s.h b/include/msvc/internal/Hacl_Hash_Blake2s.h
index eccd92de..279c472e 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2s.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2s.h
@@ -56,6 +56,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h b/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
index 2c422949..77505dc2 100644
--- a/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
+++ b/include/msvc/internal/Hacl_Hash_Blake2s_Simd128.h
@@ -58,6 +58,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
diff --git a/include/msvc/internal/Hacl_Hash_SHA2.h b/include/msvc/internal/Hacl_Hash_SHA2.h
index 7dade3f3..d61ef455 100644
--- a/include/msvc/internal/Hacl_Hash_SHA2.h
+++ b/include/msvc/internal/Hacl_Hash_SHA2.h
@@ -123,6 +123,8 @@ void Hacl_Hash_SHA2_sha256_finish(uint32_t *st, uint8_t *h);
 
 void Hacl_Hash_SHA2_sha224_init(uint32_t *hash);
 
+void Hacl_Hash_SHA2_sha224_update_nblocks(uint32_t len, uint8_t *b, uint32_t *st);
+
 void
 Hacl_Hash_SHA2_sha224_update_last(uint64_t totlen, uint32_t len, uint8_t *b, uint32_t *st);
 
diff --git a/include/msvc/libintvector.h b/include/msvc/libintvector.h
index 99d11336..11e914f7 100644
--- a/include/msvc/libintvector.h
+++ b/include/msvc/libintvector.h
@@ -19,7 +19,7 @@
 
 #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x) & 1)
 
-#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
 
 #if defined(HACL_CAN_COMPILE_VEC128)
 
diff --git a/karamel/include/krml/c_endianness.h b/karamel/include/krml/c_endianness.h
index 21d7e1b4..937d8d10 100644
--- a/karamel/include/krml/c_endianness.h
+++ b/karamel/include/krml/c_endianness.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef __KRML_ENDIAN_H
 #define __KRML_ENDIAN_H
diff --git a/karamel/include/krml/internal/builtin.h b/karamel/include/krml/internal/builtin.h
index 6098f30b..bb47d64d 100644
--- a/karamel/include/krml/internal/builtin.h
+++ b/karamel/include/krml/internal/builtin.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef __KRML_BUILTIN_H
 #define __KRML_BUILTIN_H
diff --git a/karamel/include/krml/internal/callconv.h b/karamel/include/krml/internal/callconv.h
index aeca0ba7..4bc0f878 100644
--- a/karamel/include/krml/internal/callconv.h
+++ b/karamel/include/krml/internal/callconv.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef __KRML_CALLCONV_H
 #define __KRML_CALLCONV_H
diff --git a/karamel/include/krml/internal/compat.h b/karamel/include/krml/internal/compat.h
index b557bbc1..f206520f 100644
--- a/karamel/include/krml/internal/compat.h
+++ b/karamel/include/krml/internal/compat.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef KRML_COMPAT_H
 #define KRML_COMPAT_H
diff --git a/karamel/include/krml/internal/debug.h b/karamel/include/krml/internal/debug.h
index 786db147..97f06995 100644
--- a/karamel/include/krml/internal/debug.h
+++ b/karamel/include/krml/internal/debug.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef __KRML_DEBUG_H
 #define __KRML_DEBUG_H
diff --git a/karamel/include/krml/internal/target.h b/karamel/include/krml/internal/target.h
index d4252a10..425ed282 100644
--- a/karamel/include/krml/internal/target.h
+++ b/karamel/include/krml/internal/target.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef __KRML_TARGET_H
 #define __KRML_TARGET_H
@@ -69,11 +69,21 @@
 #  endif
 #endif
 
+#ifndef KRML_ATTRIBUTE_TARGET
+#  if defined(__GNUC__)
+#    define KRML_ATTRIBUTE_TARGET(x) __attribute__((target(x)))
+#  else
+#    define KRML_ATTRIBUTE_TARGET(x)
+#  endif
+#endif
+
 #ifndef KRML_NOINLINE
 #  if defined(_MSC_VER)
 #    define KRML_NOINLINE __declspec(noinline)
 #  elif defined (__GNUC__)
 #    define KRML_NOINLINE __attribute__((noinline,unused))
+#  elif defined (__SUNPRO_C)
+#    define KRML_NOINLINE __attribute__((noinline))
 #  else
 #    define KRML_NOINLINE
 #    warning "The KRML_NOINLINE macro is not defined for this toolchain!"
@@ -82,6 +92,20 @@
 #  endif
 #endif
 
+#ifndef KRML_MUSTINLINE
+#  if defined(_MSC_VER)
+#    define KRML_MUSTINLINE inline __forceinline
+#  elif defined (__GNUC__)
+#    define KRML_MUSTINLINE inline __attribute__((always_inline))
+#  elif defined (__SUNPRO_C)
+#    define KRML_MUSTINLINE inline __attribute__((always_inline))
+#  else
+#    define KRML_MUSTINLINE inline
+#    warning "The KRML_MUSTINLINE macro defaults to plain inline for this toolchain!"
+#    warning "Please locate target.h and try to fill it out with a suitable definition for this compiler."
+#  endif
+#endif
+
 #ifndef KRML_PRE_ALIGN
 #  ifdef _MSC_VER
 #    define KRML_PRE_ALIGN(X) __declspec(align(X))
@@ -191,6 +215,8 @@ inline static int32_t krml_time(void) {
 #elif defined(__GNUC__)
 /* deprecated attribute is not defined in GCC < 4.5. */
 #  define KRML_DEPRECATED(x)
+#elif defined(__SUNPRO_C)
+#  define KRML_DEPRECATED(x) __attribute__((deprecated(x)))
 #elif defined(_MSC_VER)
 #  define KRML_DEPRECATED(x) __declspec(deprecated(x))
 #endif
diff --git a/karamel/include/krml/internal/types.h b/karamel/include/krml/internal/types.h
index e41b39be..31476313 100644
--- a/karamel/include/krml/internal/types.h
+++ b/karamel/include/krml/internal/types.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef KRML_TYPES_H
 #define KRML_TYPES_H
diff --git a/karamel/include/krml/internal/wasmsupport.h b/karamel/include/krml/internal/wasmsupport.h
index b44fa3f7..5aba9756 100644
--- a/karamel/include/krml/internal/wasmsupport.h
+++ b/karamel/include/krml/internal/wasmsupport.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 /* This file is automatically included when compiling with -wasm -d force-c */
 #define WasmSupport_check_buffer_size(X)
diff --git a/karamel/include/krml/lowstar_endianness.h b/karamel/include/krml/lowstar_endianness.h
index 1aa2ccd6..af6b882c 100644
--- a/karamel/include/krml/lowstar_endianness.h
+++ b/karamel/include/krml/lowstar_endianness.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef __LOWSTAR_ENDIANNESS_H
 #define __LOWSTAR_ENDIANNESS_H
diff --git a/karamel/krmllib/dist/minimal/FStar_UInt128.h b/karamel/krmllib/dist/minimal/FStar_UInt128.h
index ecc90213..be32ad9b 100644
--- a/karamel/krmllib/dist/minimal/FStar_UInt128.h
+++ b/karamel/krmllib/dist/minimal/FStar_UInt128.h
@@ -1,6 +1,6 @@
 /*
   Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-  Licensed under the Apache 2.0 License.
+  Licensed under the Apache 2.0 and MIT Licenses.
 */
 
 
diff --git a/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h b/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h
index 9e4e2290..d4a90220 100644
--- a/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h
+++ b/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h
@@ -1,6 +1,6 @@
 /*
   Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-  Licensed under the Apache 2.0 License.
+  Licensed under the Apache 2.0 and MIT Licenses.
 */
 
 
diff --git a/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h b/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h
index 56a2454f..39ac471f 100644
--- a/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h
+++ b/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h
@@ -1,6 +1,6 @@
 /*
   Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-  Licensed under the Apache 2.0 License.
+  Licensed under the Apache 2.0 and MIT Licenses.
 */
 
 
diff --git a/karamel/krmllib/dist/minimal/LowStar_Endianness.h b/karamel/krmllib/dist/minimal/LowStar_Endianness.h
index e851c15c..f95743d4 100644
--- a/karamel/krmllib/dist/minimal/LowStar_Endianness.h
+++ b/karamel/krmllib/dist/minimal/LowStar_Endianness.h
@@ -1,6 +1,6 @@
 /*
   Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-  Licensed under the Apache 2.0 License.
+  Licensed under the Apache 2.0 and MIT Licenses.
 */
 
 
diff --git a/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h b/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h
index ae109004..10a4dc1a 100644
--- a/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h
+++ b/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 /******************************************************************************/
 /* Machine integers (128-bit arithmetic)                                      */
diff --git a/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h b/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
index 6ff658f5..89bbc159 100644
--- a/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
+++ b/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 /* This file was generated by KaRaMeL <https://github.com/FStarLang/karamel>
  * then hand-edited to use MSVC intrinsics KaRaMeL invocation:
diff --git a/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h b/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h
index e2b6d628..bb736add 100644
--- a/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h
+++ b/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h
@@ -1,5 +1,5 @@
 /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
-   Licensed under the Apache 2.0 License. */
+   Licensed under the Apache 2.0 and MIT Licenses. */
 
 #ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H
 #define FSTAR_UINT128_STRUCT_ENDIANNESS_H
diff --git a/ocaml/ctypes.depend b/ocaml/ctypes.depend
index d94fad90..6007465b 100644
--- a/ocaml/ctypes.depend
+++ b/ocaml/ctypes.depend
@@ -1,4 +1,4 @@
-CTYPES_DEPS=lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Spec_stubs.cmx lib/Hacl_Spec_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2s_stubs.cmx lib/Hacl_Hash_Blake2s_bindings.cmx lib/Hacl_Hash_Blake2b_Simd256_stubs.cmx lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib/Hacl_Hash_Blake2s_Simd128_stubs.cmx lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib/Hacl_Hash_Base_stubs.cmx lib/Hacl_Hash_Base_bindings.cmx lib/Hacl_Hash_SHA1_stubs.cmx lib/Hacl_Hash_SHA1_bindings.cmx lib/Hacl_Hash_SHA2_stubs.cmx lib/Hacl_Hash_SHA2_bindings.cmx lib/Hacl_HMAC_stubs.cmx lib/Hacl_HMAC_bindings.cmx lib/Hacl_HMAC_Blake2s_128_stubs.cmx lib/Hacl_HMAC_Blake2s_128_bindings.cmx lib/Hacl_HMAC_Blake2b_256_stubs.cmx lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib/Hacl_Hash_SHA3_stubs.cmx lib/Hacl_Hash_SHA3_bindings.cmx lib/Hacl_SHA2_Types_stubs.cmx lib/Hacl_SHA2_Types_bindings.cmx lib/Hacl_Hash_SHA3_Simd256_stubs.cmx lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib/Hacl_Hash_MD5_stubs.cmx lib/Hacl_Hash_MD5_bindings.cmx lib/EverCrypt_Error_stubs.cmx lib/EverCrypt_Error_bindings.cmx lib/EverCrypt_AutoConfig2_stubs.cmx lib/EverCrypt_AutoConfig2_bindings.cmx lib/EverCrypt_Hash_stubs.cmx lib/EverCrypt_Hash_bindings.cmx lib/Hacl_Chacha20_stubs.cmx lib/Hacl_Chacha20_bindings.cmx lib/Hacl_Salsa20_stubs.cmx lib/Hacl_Salsa20_bindings.cmx lib/Hacl_Bignum_Base_stubs.cmx lib/Hacl_Bignum_Base_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_bindings.cmx lib/Hacl_Curve25519_64_stubs.cmx lib/Hacl_Curve25519_64_bindings.cmx lib/Hacl_Bignum25519_51_stubs.cmx lib/Hacl_Bignum25519_51_bindings.cmx lib/Hacl_Curve25519_51_stubs.cmx lib/Hacl_Curve25519_51_bindings.cmx lib/Hacl_MAC_Poly1305_stubs.cmx lib/Hacl_MAC_Poly1305_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_bindings.cmx lib/Hacl_MAC_Poly1305_Simd128_stubs.cmx lib/Hacl_MAC_Poly1305_Simd128_bindings.cmx lib/Hacl_Chacha20_Vec128_stubs.cmx lib/Hacl_Chacha20_Vec128_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_bindings.cmx lib/Hacl_MAC_Poly1305_Simd256_stubs.cmx lib/Hacl_MAC_Poly1305_Simd256_bindings.cmx lib/Hacl_Chacha20_Vec256_stubs.cmx lib/Hacl_Chacha20_Vec256_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_bindings.cmx lib/Hacl_Ed25519_stubs.cmx lib/Hacl_Ed25519_bindings.cmx lib/Hacl_NaCl_stubs.cmx lib/Hacl_NaCl_bindings.cmx lib/Hacl_P256_stubs.cmx lib/Hacl_P256_bindings.cmx lib/Hacl_Bignum_K256_stubs.cmx lib/Hacl_Bignum_K256_bindings.cmx lib/Hacl_K256_ECDSA_stubs.cmx lib/Hacl_K256_ECDSA_bindings.cmx lib/Hacl_Frodo_KEM_stubs.cmx lib/Hacl_Frodo_KEM_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_IntTypes_Intrinsics_stubs.cmx lib/Hacl_IntTypes_Intrinsics_bindings.cmx lib/Hacl_IntTypes_Intrinsics_128_stubs.cmx lib/Hacl_IntTypes_Intrinsics_128_bindings.cmx lib/Hacl_RSAPSS_stubs.cmx lib/Hacl_RSAPSS_bindings.cmx lib/Hacl_FFDHE_stubs.cmx lib/Hacl_FFDHE_bindings.cmx lib/Hacl_Frodo640_stubs.cmx lib/Hacl_Frodo640_bindings.cmx lib/Hacl_HKDF_stubs.cmx lib/Hacl_HKDF_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_bindings.cmx lib/EverCrypt_Cipher_stubs.cmx lib/EverCrypt_Cipher_bindings.cmx lib/Hacl_GenericField32_stubs.cmx lib/Hacl_GenericField32_bindings.cmx lib/Hacl_SHA2_Vec256_stubs.cmx lib/Hacl_SHA2_Vec256_bindings.cmx lib/Hacl_EC_K256_stubs.cmx lib/Hacl_EC_K256_bindings.cmx lib/Hacl_Bignum4096_stubs.cmx lib/Hacl_Bignum4096_bindings.cmx lib/Hacl_Chacha20_Vec32_stubs.cmx lib/Hacl_Chacha20_Vec32_bindings.cmx lib/EverCrypt_Ed25519_stubs.cmx lib/EverCrypt_Ed25519_bindings.cmx lib/Hacl_Bignum4096_32_stubs.cmx lib/Hacl_Bignum4096_32_bindings.cmx lib/EverCrypt_HMAC_stubs.cmx lib/EverCrypt_HMAC_bindings.cmx lib/Hacl_HMAC_DRBG_stubs.cmx lib/Hacl_HMAC_DRBG_bindings.cmx lib/EverCrypt_DRBG_stubs.cmx lib/EverCrypt_DRBG_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP128_SHA256_bindings.cmx lib/EverCrypt_Curve25519_stubs.cmx lib/EverCrypt_Curve25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_bindings.cmx lib/Hacl_Frodo976_stubs.cmx lib/Hacl_Frodo976_bindings.cmx lib/Hacl_HKDF_Blake2s_128_stubs.cmx lib/Hacl_HKDF_Blake2s_128_bindings.cmx lib/Hacl_GenericField64_stubs.cmx lib/Hacl_GenericField64_bindings.cmx lib/Hacl_Frodo1344_stubs.cmx lib/Hacl_Frodo1344_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_bindings.cmx lib/Hacl_Bignum32_stubs.cmx lib/Hacl_Bignum32_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_bindings.cmx lib/Hacl_Bignum256_32_stubs.cmx lib/Hacl_Bignum256_32_bindings.cmx lib/Hacl_SHA2_Vec128_stubs.cmx lib/Hacl_SHA2_Vec128_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib/EverCrypt_Poly1305_stubs.cmx lib/EverCrypt_Poly1305_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_P256_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP32_SHA256_bindings.cmx lib/Hacl_Bignum64_stubs.cmx lib/Hacl_Bignum64_bindings.cmx lib/Hacl_Frodo64_stubs.cmx lib/Hacl_Frodo64_bindings.cmx lib/Hacl_HKDF_Blake2b_256_stubs.cmx lib/Hacl_HKDF_Blake2b_256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_bindings.cmx lib/EverCrypt_HKDF_stubs.cmx lib/EverCrypt_HKDF_bindings.cmx lib/Hacl_EC_Ed25519_stubs.cmx lib/Hacl_EC_Ed25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_bindings.cmx lib/EverCrypt_Chacha20Poly1305_stubs.cmx lib/EverCrypt_Chacha20Poly1305_bindings.cmx lib/EverCrypt_AEAD_stubs.cmx lib/EverCrypt_AEAD_bindings.cmx lib/Hacl_Bignum256_stubs.cmx lib/Hacl_Bignum256_bindings.cmx 
+CTYPES_DEPS=lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Spec_stubs.cmx lib/Hacl_Spec_bindings.cmx lib/Hacl_Hash_Blake2b_stubs.cmx lib/Hacl_Hash_Blake2b_bindings.cmx lib/Hacl_Hash_Blake2s_stubs.cmx lib/Hacl_Hash_Blake2s_bindings.cmx lib/Hacl_Hash_Blake2b_Simd256_stubs.cmx lib/Hacl_Hash_Blake2b_Simd256_bindings.cmx lib/Hacl_Hash_Blake2s_Simd128_stubs.cmx lib/Hacl_Hash_Blake2s_Simd128_bindings.cmx lib/Hacl_Hash_Base_stubs.cmx lib/Hacl_Hash_Base_bindings.cmx lib/Hacl_Hash_MD5_stubs.cmx lib/Hacl_Hash_MD5_bindings.cmx lib/Hacl_Hash_SHA1_stubs.cmx lib/Hacl_Hash_SHA1_bindings.cmx lib/Hacl_Hash_SHA3_stubs.cmx lib/Hacl_Hash_SHA3_bindings.cmx lib/Hacl_Hash_SHA2_stubs.cmx lib/Hacl_Hash_SHA2_bindings.cmx lib/Hacl_HMAC_stubs.cmx lib/Hacl_HMAC_bindings.cmx lib/Hacl_HMAC_Blake2s_128_stubs.cmx lib/Hacl_HMAC_Blake2s_128_bindings.cmx lib/Hacl_HMAC_Blake2b_256_stubs.cmx lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib/Hacl_SHA2_Types_stubs.cmx lib/Hacl_SHA2_Types_bindings.cmx lib/Hacl_Hash_SHA3_Simd256_stubs.cmx lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib/EverCrypt_Error_stubs.cmx lib/EverCrypt_Error_bindings.cmx lib/EverCrypt_AutoConfig2_stubs.cmx lib/EverCrypt_AutoConfig2_bindings.cmx lib/EverCrypt_Hash_stubs.cmx lib/EverCrypt_Hash_bindings.cmx lib/Hacl_Chacha20_stubs.cmx lib/Hacl_Chacha20_bindings.cmx lib/Hacl_Salsa20_stubs.cmx lib/Hacl_Salsa20_bindings.cmx lib/Hacl_Bignum_Base_stubs.cmx lib/Hacl_Bignum_Base_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_bindings.cmx lib/Hacl_Curve25519_64_stubs.cmx lib/Hacl_Curve25519_64_bindings.cmx lib/Hacl_Bignum25519_51_stubs.cmx lib/Hacl_Bignum25519_51_bindings.cmx lib/Hacl_Curve25519_51_stubs.cmx lib/Hacl_Curve25519_51_bindings.cmx lib/Hacl_MAC_Poly1305_stubs.cmx lib/Hacl_MAC_Poly1305_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_bindings.cmx lib/Hacl_MAC_Poly1305_Simd128_stubs.cmx lib/Hacl_MAC_Poly1305_Simd128_bindings.cmx lib/Hacl_Chacha20_Vec128_stubs.cmx lib/Hacl_Chacha20_Vec128_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd128_bindings.cmx lib/Hacl_MAC_Poly1305_Simd256_stubs.cmx lib/Hacl_MAC_Poly1305_Simd256_bindings.cmx lib/Hacl_Chacha20_Vec256_stubs.cmx lib/Hacl_Chacha20_Vec256_bindings.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_stubs.cmx lib/Hacl_AEAD_Chacha20Poly1305_Simd256_bindings.cmx lib/Hacl_Ed25519_stubs.cmx lib/Hacl_Ed25519_bindings.cmx lib/Hacl_NaCl_stubs.cmx lib/Hacl_NaCl_bindings.cmx lib/Hacl_P256_stubs.cmx lib/Hacl_P256_bindings.cmx lib/Hacl_Bignum_K256_stubs.cmx lib/Hacl_Bignum_K256_bindings.cmx lib/Hacl_K256_ECDSA_stubs.cmx lib/Hacl_K256_ECDSA_bindings.cmx lib/Hacl_Frodo_KEM_stubs.cmx lib/Hacl_Frodo_KEM_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_IntTypes_Intrinsics_stubs.cmx lib/Hacl_IntTypes_Intrinsics_bindings.cmx lib/Hacl_IntTypes_Intrinsics_128_stubs.cmx lib/Hacl_IntTypes_Intrinsics_128_bindings.cmx lib/Hacl_RSAPSS_stubs.cmx lib/Hacl_RSAPSS_bindings.cmx lib/Hacl_FFDHE_stubs.cmx lib/Hacl_FFDHE_bindings.cmx lib/Hacl_Frodo640_stubs.cmx lib/Hacl_Frodo640_bindings.cmx lib/Hacl_HKDF_stubs.cmx lib/Hacl_HKDF_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA512_bindings.cmx lib/EverCrypt_Cipher_stubs.cmx lib/EverCrypt_Cipher_bindings.cmx lib/Hacl_GenericField32_stubs.cmx lib/Hacl_GenericField32_bindings.cmx lib/Hacl_SHA2_Vec256_stubs.cmx lib/Hacl_SHA2_Vec256_bindings.cmx lib/Hacl_EC_K256_stubs.cmx lib/Hacl_EC_K256_bindings.cmx lib/Hacl_Bignum4096_stubs.cmx lib/Hacl_Bignum4096_bindings.cmx lib/EverCrypt_Ed25519_stubs.cmx lib/EverCrypt_Ed25519_bindings.cmx lib/Hacl_Chacha20_Vec32_stubs.cmx lib/Hacl_Chacha20_Vec32_bindings.cmx lib/Hacl_Bignum4096_32_stubs.cmx lib/Hacl_Bignum4096_32_bindings.cmx lib/EverCrypt_HMAC_stubs.cmx lib/EverCrypt_HMAC_bindings.cmx lib/Hacl_HMAC_DRBG_stubs.cmx lib/Hacl_HMAC_DRBG_bindings.cmx lib/EverCrypt_DRBG_stubs.cmx lib/EverCrypt_DRBG_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP128_SHA256_bindings.cmx lib/EverCrypt_Curve25519_stubs.cmx lib/EverCrypt_Curve25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA512_bindings.cmx lib/Hacl_Frodo976_stubs.cmx lib/Hacl_Frodo976_bindings.cmx lib/Hacl_HKDF_Blake2s_128_stubs.cmx lib/Hacl_HKDF_Blake2s_128_bindings.cmx lib/Hacl_GenericField64_stubs.cmx lib/Hacl_GenericField64_bindings.cmx lib/Hacl_Frodo1344_stubs.cmx lib/Hacl_Frodo1344_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA512_bindings.cmx lib/Hacl_Bignum32_stubs.cmx lib/Hacl_Bignum32_bindings.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP128_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP128_SHA256_bindings.cmx lib/Hacl_Bignum256_32_stubs.cmx lib/Hacl_Bignum256_32_bindings.cmx lib/Hacl_SHA2_Vec128_stubs.cmx lib/Hacl_SHA2_Vec128_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib/EverCrypt_Poly1305_stubs.cmx lib/EverCrypt_Poly1305_bindings.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx lib/Hacl_HPKE_P256_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP256_SHA256_bindings.cmx lib/Hacl_HPKE_P256_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_P256_CP32_SHA256_bindings.cmx lib/Hacl_Bignum64_stubs.cmx lib/Hacl_Bignum64_bindings.cmx lib/Hacl_Frodo64_stubs.cmx lib/Hacl_Frodo64_bindings.cmx lib/Hacl_HKDF_Blake2b_256_stubs.cmx lib/Hacl_HKDF_Blake2b_256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA256_bindings.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_stubs.cmx lib/Hacl_HPKE_Curve64_CP32_SHA512_bindings.cmx lib/EverCrypt_HKDF_stubs.cmx lib/EverCrypt_HKDF_bindings.cmx lib/Hacl_EC_Ed25519_stubs.cmx lib/Hacl_EC_Ed25519_bindings.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_stubs.cmx lib/Hacl_HPKE_Curve51_CP256_SHA256_bindings.cmx lib/EverCrypt_Chacha20Poly1305_stubs.cmx lib/EverCrypt_Chacha20Poly1305_bindings.cmx lib/EverCrypt_AEAD_stubs.cmx lib/EverCrypt_AEAD_bindings.cmx lib/Hacl_Bignum256_stubs.cmx lib/Hacl_Bignum256_bindings.cmx 
 lib/Hacl_Streaming_Types_bindings.cmx: 
 lib/Hacl_Streaming_Types_bindings.cmo: 
 lib_gen/Hacl_Streaming_Types_gen.cmx: lib/Hacl_Streaming_Types_bindings.cmx
@@ -27,10 +27,18 @@ lib/Hacl_Hash_Base_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_
 lib/Hacl_Hash_Base_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
 lib_gen/Hacl_Hash_Base_gen.cmx: lib/Hacl_Hash_Base_bindings.cmx
 lib_gen/Hacl_Hash_Base_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_Base_bindings.cmx lib_gen/Hacl_Hash_Base_gen.cmx 
+lib/Hacl_Hash_MD5_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
+lib/Hacl_Hash_MD5_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
+lib_gen/Hacl_Hash_MD5_gen.cmx: lib/Hacl_Hash_MD5_bindings.cmx
+lib_gen/Hacl_Hash_MD5_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_MD5_bindings.cmx lib_gen/Hacl_Hash_MD5_gen.cmx 
 lib/Hacl_Hash_SHA1_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
 lib/Hacl_Hash_SHA1_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
 lib_gen/Hacl_Hash_SHA1_gen.cmx: lib/Hacl_Hash_SHA1_bindings.cmx
 lib_gen/Hacl_Hash_SHA1_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_SHA1_bindings.cmx lib_gen/Hacl_Hash_SHA1_gen.cmx 
+lib/Hacl_Hash_SHA3_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
+lib/Hacl_Hash_SHA3_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
+lib_gen/Hacl_Hash_SHA3_gen.cmx: lib/Hacl_Hash_SHA3_bindings.cmx
+lib_gen/Hacl_Hash_SHA3_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_SHA3_bindings.cmx lib_gen/Hacl_Hash_SHA3_gen.cmx 
 lib/Hacl_Hash_SHA2_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
 lib/Hacl_Hash_SHA2_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
 lib_gen/Hacl_Hash_SHA2_gen.cmx: lib/Hacl_Hash_SHA2_bindings.cmx
@@ -47,10 +55,6 @@ lib/Hacl_HMAC_Blake2b_256_bindings.cmx:
 lib/Hacl_HMAC_Blake2b_256_bindings.cmo: 
 lib_gen/Hacl_HMAC_Blake2b_256_gen.cmx: lib/Hacl_HMAC_Blake2b_256_bindings.cmx
 lib_gen/Hacl_HMAC_Blake2b_256_gen.exe: lib/Hacl_HMAC_Blake2b_256_bindings.cmx lib_gen/Hacl_HMAC_Blake2b_256_gen.cmx 
-lib/Hacl_Hash_SHA3_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
-lib/Hacl_Hash_SHA3_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
-lib_gen/Hacl_Hash_SHA3_gen.cmx: lib/Hacl_Hash_SHA3_bindings.cmx
-lib_gen/Hacl_Hash_SHA3_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_SHA3_bindings.cmx lib_gen/Hacl_Hash_SHA3_gen.cmx 
 lib/Hacl_SHA2_Types_bindings.cmx: 
 lib/Hacl_SHA2_Types_bindings.cmo: 
 lib_gen/Hacl_SHA2_Types_gen.cmx: lib/Hacl_SHA2_Types_bindings.cmx
@@ -59,10 +63,6 @@ lib/Hacl_Hash_SHA3_Simd256_bindings.cmx:
 lib/Hacl_Hash_SHA3_Simd256_bindings.cmo: 
 lib_gen/Hacl_Hash_SHA3_Simd256_gen.cmx: lib/Hacl_Hash_SHA3_Simd256_bindings.cmx
 lib_gen/Hacl_Hash_SHA3_Simd256_gen.exe: lib/Hacl_Hash_SHA3_Simd256_bindings.cmx lib_gen/Hacl_Hash_SHA3_Simd256_gen.cmx 
-lib/Hacl_Hash_MD5_bindings.cmx: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx 
-lib/Hacl_Hash_MD5_bindings.cmo: lib/Hacl_Streaming_Types_bindings.cmo lib/Hacl_Streaming_Types_stubs.cmo 
-lib_gen/Hacl_Hash_MD5_gen.cmx: lib/Hacl_Hash_MD5_bindings.cmx
-lib_gen/Hacl_Hash_MD5_gen.exe: lib/Hacl_Streaming_Types_bindings.cmx lib/Hacl_Streaming_Types_stubs.cmx lib/Hacl_Streaming_Types_c_stubs.o lib/Hacl_Hash_MD5_bindings.cmx lib_gen/Hacl_Hash_MD5_gen.cmx 
 lib/EverCrypt_Error_bindings.cmx: 
 lib/EverCrypt_Error_bindings.cmo: 
 lib_gen/EverCrypt_Error_gen.cmx: lib/EverCrypt_Error_bindings.cmx
@@ -211,14 +211,14 @@ lib/Hacl_Bignum4096_bindings.cmx: lib/Hacl_Bignum_bindings.cmx lib/Hacl_Bignum_s
 lib/Hacl_Bignum4096_bindings.cmo: lib/Hacl_Bignum_bindings.cmo lib/Hacl_Bignum_stubs.cmo 
 lib_gen/Hacl_Bignum4096_gen.cmx: lib/Hacl_Bignum4096_bindings.cmx
 lib_gen/Hacl_Bignum4096_gen.exe: lib/Hacl_Bignum_bindings.cmx lib/Hacl_Bignum_stubs.cmx lib/Hacl_Bignum_c_stubs.o lib/Hacl_Bignum4096_bindings.cmx lib_gen/Hacl_Bignum4096_gen.cmx 
-lib/Hacl_Chacha20_Vec32_bindings.cmx: 
-lib/Hacl_Chacha20_Vec32_bindings.cmo: 
-lib_gen/Hacl_Chacha20_Vec32_gen.cmx: lib/Hacl_Chacha20_Vec32_bindings.cmx
-lib_gen/Hacl_Chacha20_Vec32_gen.exe: lib/Hacl_Chacha20_Vec32_bindings.cmx lib_gen/Hacl_Chacha20_Vec32_gen.cmx 
 lib/EverCrypt_Ed25519_bindings.cmx: 
 lib/EverCrypt_Ed25519_bindings.cmo: 
 lib_gen/EverCrypt_Ed25519_gen.cmx: lib/EverCrypt_Ed25519_bindings.cmx
 lib_gen/EverCrypt_Ed25519_gen.exe: lib/EverCrypt_Ed25519_bindings.cmx lib_gen/EverCrypt_Ed25519_gen.cmx 
+lib/Hacl_Chacha20_Vec32_bindings.cmx: 
+lib/Hacl_Chacha20_Vec32_bindings.cmo: 
+lib_gen/Hacl_Chacha20_Vec32_gen.cmx: lib/Hacl_Chacha20_Vec32_bindings.cmx
+lib_gen/Hacl_Chacha20_Vec32_gen.exe: lib/Hacl_Chacha20_Vec32_bindings.cmx lib_gen/Hacl_Chacha20_Vec32_gen.cmx 
 lib/Hacl_Bignum4096_32_bindings.cmx: lib/Hacl_Bignum_bindings.cmx lib/Hacl_Bignum_stubs.cmx 
 lib/Hacl_Bignum4096_32_bindings.cmo: lib/Hacl_Bignum_bindings.cmo lib/Hacl_Bignum_stubs.cmo 
 lib_gen/Hacl_Bignum4096_32_gen.cmx: lib/Hacl_Bignum4096_32_bindings.cmx
@@ -295,14 +295,14 @@ lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Imp
 lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
 lib_gen/Hacl_HPKE_Curve51_CP32_SHA256_gen.cmx: lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx
 lib_gen/Hacl_HPKE_Curve51_CP32_SHA256_gen.exe: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_c_stubs.o lib/Hacl_HPKE_Curve51_CP32_SHA256_bindings.cmx lib_gen/Hacl_HPKE_Curve51_CP32_SHA256_gen.cmx 
-lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx 
-lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
-lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx: lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx
-lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.exe: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_c_stubs.o lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx 
 lib/EverCrypt_Poly1305_bindings.cmx: 
 lib/EverCrypt_Poly1305_bindings.cmo: 
 lib_gen/EverCrypt_Poly1305_gen.cmx: lib/EverCrypt_Poly1305_bindings.cmx
 lib_gen/EverCrypt_Poly1305_gen.exe: lib/EverCrypt_Poly1305_bindings.cmx lib_gen/EverCrypt_Poly1305_gen.cmx 
+lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx 
+lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
+lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx: lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx
+lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.exe: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_c_stubs.o lib/Hacl_HPKE_Curve64_CP256_SHA256_bindings.cmx lib_gen/Hacl_HPKE_Curve64_CP256_SHA256_gen.cmx 
 lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmx lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmx 
 lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmo: lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_bindings.cmo lib/Hacl_HPKE_Interface_Hacl_Impl_HPKE_Hacl_Meta_HPKE_stubs.cmo 
 lib_gen/Hacl_HPKE_Curve51_CP32_SHA512_gen.cmx: lib/Hacl_HPKE_Curve51_CP32_SHA512_bindings.cmx
diff --git a/ocaml/lib/Hacl_HMAC_bindings.ml b/ocaml/lib/Hacl_HMAC_bindings.ml
index 725d49b5..869e5c19 100644
--- a/ocaml/lib/Hacl_HMAC_bindings.ml
+++ b/ocaml/lib/Hacl_HMAC_bindings.ml
@@ -2,11 +2,21 @@ open Ctypes
 module Bindings(F:Cstubs.FOREIGN) =
   struct
     open F
+    let hacl_HMAC_compute_md5 =
+      foreign "Hacl_HMAC_compute_md5"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
     let hacl_HMAC_compute_sha1 =
       foreign "Hacl_HMAC_compute_sha1"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
+    let hacl_HMAC_compute_sha2_224 =
+      foreign "Hacl_HMAC_compute_sha2_224"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
     let hacl_HMAC_compute_sha2_256 =
       foreign "Hacl_HMAC_compute_sha2_256"
         (ocaml_bytes @->
@@ -22,6 +32,26 @@ module Bindings(F:Cstubs.FOREIGN) =
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
+    let hacl_HMAC_compute_sha3_224 =
+      foreign "Hacl_HMAC_compute_sha3_224"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
+    let hacl_HMAC_compute_sha3_256 =
+      foreign "Hacl_HMAC_compute_sha3_256"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
+    let hacl_HMAC_compute_sha3_384 =
+      foreign "Hacl_HMAC_compute_sha3_384"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
+    let hacl_HMAC_compute_sha3_512 =
+      foreign "Hacl_HMAC_compute_sha3_512"
+        (ocaml_bytes @->
+           (ocaml_bytes @->
+              (uint32_t @-> (ocaml_bytes @-> (uint32_t @-> (returning void))))))
     let hacl_HMAC_compute_blake2s_32 =
       foreign "Hacl_HMAC_compute_blake2s_32"
         (ocaml_bytes @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
index 1c132a7a..8fdc5be6 100644
--- a/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2b_Simd256_bindings.ml
@@ -15,8 +15,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2b_Simd256_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
index 7ba4fcf6..d57e8b56 100644
--- a/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2b_bindings.ml
@@ -39,22 +39,26 @@ module Bindings(F:Cstubs.FOREIGN) =
       field hacl_Hash_Blake2b_index "key_length" uint8_t
     let hacl_Hash_Blake2b_index_digest_length =
       field hacl_Hash_Blake2b_index "digest_length" uint8_t
+    let hacl_Hash_Blake2b_index_last_node =
+      field hacl_Hash_Blake2b_index "last_node" bool
     let _ = seal hacl_Hash_Blake2b_index
+    type hacl_Hash_Blake2b_params_and_key =
+      [ `hacl_Hash_Blake2b_params_and_key ] structure
+    let (hacl_Hash_Blake2b_params_and_key :
+      [ `hacl_Hash_Blake2b_params_and_key ] structure typ) =
+      structure "Hacl_Hash_Blake2b_params_and_key_s"
+    let hacl_Hash_Blake2b_params_and_key_fst =
+      field hacl_Hash_Blake2b_params_and_key "fst"
+        (ptr hacl_Hash_Blake2b_blake2_params)
+    let hacl_Hash_Blake2b_params_and_key_snd =
+      field hacl_Hash_Blake2b_params_and_key "snd" (ptr uint8_t)
+    let _ = seal hacl_Hash_Blake2b_params_and_key
     let hacl_Hash_Blake2b_init =
       foreign "Hacl_Hash_Blake2b_init"
         ((ptr uint64_t) @-> (uint32_t @-> (uint32_t @-> (returning void))))
     let hacl_Hash_Blake2b_finish =
       foreign "Hacl_Hash_Blake2b_finish"
         (uint32_t @-> (ocaml_bytes @-> ((ptr uint64_t) @-> (returning void))))
-    type k____uint64_t___uint64_t_ = [ `k____uint64_t___uint64_t_ ] structure
-    let (k____uint64_t___uint64_t_ :
-      [ `k____uint64_t___uint64_t_ ] structure typ) =
-      structure "K____uint64_t___uint64_t__s"
-    let k____uint64_t___uint64_t__fst =
-      field k____uint64_t___uint64_t_ "fst" (ptr uint64_t)
-    let k____uint64_t___uint64_t__snd =
-      field k____uint64_t___uint64_t_ "snd" (ptr uint64_t)
-    let _ = seal k____uint64_t___uint64_t_
     type hacl_Hash_Blake2b_block_state_t =
       [ `hacl_Hash_Blake2b_block_state_t ] structure
     let (hacl_Hash_Blake2b_block_state_t :
@@ -65,7 +69,11 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_block_state_t_snd =
       field hacl_Hash_Blake2b_block_state_t "snd" uint8_t
     let hacl_Hash_Blake2b_block_state_t_thd =
-      field hacl_Hash_Blake2b_block_state_t "thd" k____uint64_t___uint64_t_
+      field hacl_Hash_Blake2b_block_state_t "thd" bool
+    let hacl_Hash_Blake2b_block_state_t_f3 =
+      field hacl_Hash_Blake2b_block_state_t "f3" (ptr uint64_t)
+    let hacl_Hash_Blake2b_block_state_t_f4 =
+      field hacl_Hash_Blake2b_block_state_t "f4" (ptr uint64_t)
     let _ = seal hacl_Hash_Blake2b_block_state_t
     type hacl_Hash_Blake2b_state_t = [ `hacl_Hash_Blake2b_state_t ] structure
     let (hacl_Hash_Blake2b_state_t :
@@ -82,7 +90,8 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_malloc_with_params_and_key =
       foreign "Hacl_Hash_Blake2b_malloc_with_params_and_key"
         ((ptr hacl_Hash_Blake2b_blake2_params) @->
-           (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2b_state_t))))
+           (bool @->
+              (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2b_state_t)))))
     let hacl_Hash_Blake2b_malloc_with_key =
       foreign "Hacl_Hash_Blake2b_malloc_with_key"
         (ocaml_bytes @->
@@ -110,7 +119,11 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2b_digest =
       foreign "Hacl_Hash_Blake2b_digest"
         ((ptr hacl_Hash_Blake2b_state_t) @->
-           (ocaml_bytes @-> (returning void)))
+           (ocaml_bytes @-> (returning uint8_t)))
+    let hacl_Hash_Blake2b_info =
+      foreign "Hacl_Hash_Blake2b_info"
+        ((ptr hacl_Hash_Blake2b_state_t) @->
+           (returning hacl_Hash_Blake2b_index))
     let hacl_Hash_Blake2b_free =
       foreign "Hacl_Hash_Blake2b_free"
         ((ptr hacl_Hash_Blake2b_state_t) @-> (returning void))
@@ -125,8 +138,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2b_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2b_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2b_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2b_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
index 6533ddbc..75fbbf39 100644
--- a/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2s_Simd128_bindings.ml
@@ -15,8 +15,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2s_Simd128_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml b/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
index f6c93e89..b40e2a00 100644
--- a/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_Blake2s_bindings.ml
@@ -23,20 +23,12 @@ module Bindings(F:Cstubs.FOREIGN) =
         (uint32_t @->
            ((ptr uint32_t) @->
               ((ptr uint32_t) @->
-                 (uint64_t @->
-                    (uint32_t @-> (ocaml_bytes @-> (returning void)))))))
+                 (bool @->
+                    (uint64_t @->
+                       (uint32_t @-> (ocaml_bytes @-> (returning void))))))))
     let hacl_Hash_Blake2s_finish =
       foreign "Hacl_Hash_Blake2s_finish"
         (uint32_t @-> (ocaml_bytes @-> ((ptr uint32_t) @-> (returning void))))
-    type k____uint32_t___uint32_t_ = [ `k____uint32_t___uint32_t_ ] structure
-    let (k____uint32_t___uint32_t_ :
-      [ `k____uint32_t___uint32_t_ ] structure typ) =
-      structure "K____uint32_t___uint32_t__s"
-    let k____uint32_t___uint32_t__fst =
-      field k____uint32_t___uint32_t_ "fst" (ptr uint32_t)
-    let k____uint32_t___uint32_t__snd =
-      field k____uint32_t___uint32_t_ "snd" (ptr uint32_t)
-    let _ = seal k____uint32_t___uint32_t_
     type hacl_Hash_Blake2s_block_state_t =
       [ `hacl_Hash_Blake2s_block_state_t ] structure
     let (hacl_Hash_Blake2s_block_state_t :
@@ -47,7 +39,11 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_block_state_t_snd =
       field hacl_Hash_Blake2s_block_state_t "snd" uint8_t
     let hacl_Hash_Blake2s_block_state_t_thd =
-      field hacl_Hash_Blake2s_block_state_t "thd" k____uint32_t___uint32_t_
+      field hacl_Hash_Blake2s_block_state_t "thd" bool
+    let hacl_Hash_Blake2s_block_state_t_f3 =
+      field hacl_Hash_Blake2s_block_state_t "f3" (ptr uint32_t)
+    let hacl_Hash_Blake2s_block_state_t_f4 =
+      field hacl_Hash_Blake2s_block_state_t "f4" (ptr uint32_t)
     let _ = seal hacl_Hash_Blake2s_block_state_t
     type hacl_Hash_Blake2s_state_t = [ `hacl_Hash_Blake2s_state_t ] structure
     let (hacl_Hash_Blake2s_state_t :
@@ -64,7 +60,8 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_malloc_with_params_and_key =
       foreign "Hacl_Hash_Blake2s_malloc_with_params_and_key"
         ((ptr hacl_Hash_Blake2b_blake2_params) @->
-           (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2s_state_t))))
+           (bool @->
+              (ocaml_bytes @-> (returning (ptr hacl_Hash_Blake2s_state_t)))))
     let hacl_Hash_Blake2s_malloc_with_key =
       foreign "Hacl_Hash_Blake2s_malloc_with_key"
         (ocaml_bytes @->
@@ -92,7 +89,11 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_Blake2s_digest =
       foreign "Hacl_Hash_Blake2s_digest"
         ((ptr hacl_Hash_Blake2s_state_t) @->
-           (ocaml_bytes @-> (returning void)))
+           (ocaml_bytes @-> (returning uint8_t)))
+    let hacl_Hash_Blake2s_info =
+      foreign "Hacl_Hash_Blake2s_info"
+        ((ptr hacl_Hash_Blake2s_state_t) @->
+           (returning hacl_Hash_Blake2b_index))
     let hacl_Hash_Blake2s_free =
       foreign "Hacl_Hash_Blake2s_free"
         ((ptr hacl_Hash_Blake2s_state_t) @-> (returning void))
@@ -107,8 +108,8 @@ module Bindings(F:Cstubs.FOREIGN) =
               (ocaml_bytes @->
                  (uint32_t @->
                     (ocaml_bytes @-> (uint32_t @-> (returning void)))))))
-    let hacl_Hash_Blake2s_hash_with_key_and_paramas =
-      foreign "Hacl_Hash_Blake2s_hash_with_key_and_paramas"
+    let hacl_Hash_Blake2s_hash_with_key_and_params =
+      foreign "Hacl_Hash_Blake2s_hash_with_key_and_params"
         (ocaml_bytes @->
            (ocaml_bytes @->
               (uint32_t @->
diff --git a/ocaml/lib/Hacl_Hash_SHA2_bindings.ml b/ocaml/lib/Hacl_Hash_SHA2_bindings.ml
index f0573724..7475a850 100644
--- a/ocaml/lib/Hacl_Hash_SHA2_bindings.ml
+++ b/ocaml/lib/Hacl_Hash_SHA2_bindings.ml
@@ -22,6 +22,9 @@ module Bindings(F:Cstubs.FOREIGN) =
     let hacl_Hash_SHA2_sha224_init =
       foreign "Hacl_Hash_SHA2_sha224_init"
         ((ptr uint32_t) @-> (returning void))
+    let hacl_Hash_SHA2_sha224_update_nblocks =
+      foreign "Hacl_Hash_SHA2_sha224_update_nblocks"
+        (uint32_t @-> (ocaml_bytes @-> ((ptr uint32_t) @-> (returning void))))
     let hacl_Hash_SHA2_sha224_update_last =
       foreign "Hacl_Hash_SHA2_sha224_update_last"
         (uint64_t @->
diff --git a/src/EverCrypt_AEAD.c b/src/EverCrypt_AEAD.c
index b0fb4826..89965054 100644
--- a/src/EverCrypt_AEAD.c
+++ b/src/EverCrypt_AEAD.c
@@ -538,26 +538,27 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm_no_check(
   KRML_MAYBE_UNUSED_VAR(cipher);
   KRML_MAYBE_UNUSED_VAR(tag);
   #if HACL_CAN_COMPILE_VALE
-  uint8_t ek[480U] = { 0U };
-  uint8_t *keys_b0 = ek;
-  uint8_t *hkeys_b0 = ek + 176U;
+  uint8_t ek0[480U] = { 0U };
+  uint8_t *keys_b0 = ek0;
+  uint8_t *hkeys_b0 = ek0 + 176U;
   aes128_key_expansion(k, keys_b0);
   aes128_keyhash_init(keys_b0, hkeys_b0);
-  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek };
+  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek0 };
   EverCrypt_AEAD_state_s *s = &p;
+  EverCrypt_Error_error_code r;
   if (s == NULL)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+    r = EverCrypt_Error_InvalidKey;
   }
   else if (iv_len == 0U)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+    r = EverCrypt_Error_InvalidIVLength;
   }
   else
   {
-    uint8_t *ek0 = (*s).ek;
-    uint8_t *scratch_b = ek0 + 304U;
-    uint8_t *ek1 = ek0;
+    uint8_t *ek = (*s).ek;
+    uint8_t *scratch_b = ek + 304U;
+    uint8_t *ek1 = ek;
     uint8_t *keys_b = ek1;
     uint8_t *hkeys_b = ek1 + 176U;
     uint8_t tmp_iv[16U] = { 0U };
@@ -637,8 +638,9 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm_no_check(
     memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
       inout_b,
       (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-    KRML_HOST_IGNORE(EverCrypt_Error_Success);
+    r = EverCrypt_Error_Success;
   }
+  KRML_MAYBE_UNUSED_VAR(r);
   return EverCrypt_Error_Success;
   #else
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n",
@@ -680,26 +682,27 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm_no_check(
   KRML_MAYBE_UNUSED_VAR(cipher);
   KRML_MAYBE_UNUSED_VAR(tag);
   #if HACL_CAN_COMPILE_VALE
-  uint8_t ek[544U] = { 0U };
-  uint8_t *keys_b0 = ek;
-  uint8_t *hkeys_b0 = ek + 240U;
+  uint8_t ek0[544U] = { 0U };
+  uint8_t *keys_b0 = ek0;
+  uint8_t *hkeys_b0 = ek0 + 240U;
   aes256_key_expansion(k, keys_b0);
   aes256_keyhash_init(keys_b0, hkeys_b0);
-  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek };
+  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek0 };
   EverCrypt_AEAD_state_s *s = &p;
+  EverCrypt_Error_error_code r;
   if (s == NULL)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+    r = EverCrypt_Error_InvalidKey;
   }
   else if (iv_len == 0U)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+    r = EverCrypt_Error_InvalidIVLength;
   }
   else
   {
-    uint8_t *ek0 = (*s).ek;
-    uint8_t *scratch_b = ek0 + 368U;
-    uint8_t *ek1 = ek0;
+    uint8_t *ek = (*s).ek;
+    uint8_t *scratch_b = ek + 368U;
+    uint8_t *ek1 = ek;
     uint8_t *keys_b = ek1;
     uint8_t *hkeys_b = ek1 + 240U;
     uint8_t tmp_iv[16U] = { 0U };
@@ -779,8 +782,9 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm_no_check(
     memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
       inout_b,
       (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-    KRML_HOST_IGNORE(EverCrypt_Error_Success);
+    r = EverCrypt_Error_Success;
   }
+  KRML_MAYBE_UNUSED_VAR(r);
   return EverCrypt_Error_Success;
   #else
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n",
@@ -821,26 +825,27 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm(
   bool has_aesni = EverCrypt_AutoConfig2_has_aesni();
   if (has_aesni && has_pclmulqdq && has_avx && has_sse && has_movbe)
   {
-    uint8_t ek[480U] = { 0U };
-    uint8_t *keys_b0 = ek;
-    uint8_t *hkeys_b0 = ek + 176U;
+    uint8_t ek0[480U] = { 0U };
+    uint8_t *keys_b0 = ek0;
+    uint8_t *hkeys_b0 = ek0 + 176U;
     aes128_key_expansion(k, keys_b0);
     aes128_keyhash_init(keys_b0, hkeys_b0);
-    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek };
+    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek0 };
     EverCrypt_AEAD_state_s *s = &p;
+    EverCrypt_Error_error_code r;
     if (s == NULL)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+      r = EverCrypt_Error_InvalidKey;
     }
     else if (iv_len == 0U)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+      r = EverCrypt_Error_InvalidIVLength;
     }
     else
     {
-      uint8_t *ek0 = (*s).ek;
-      uint8_t *scratch_b = ek0 + 304U;
-      uint8_t *ek1 = ek0;
+      uint8_t *ek = (*s).ek;
+      uint8_t *scratch_b = ek + 304U;
+      uint8_t *ek1 = ek;
       uint8_t *keys_b = ek1;
       uint8_t *hkeys_b = ek1 + 176U;
       uint8_t tmp_iv[16U] = { 0U };
@@ -920,8 +925,9 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm(
       memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
         inout_b,
         (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-      KRML_HOST_IGNORE(EverCrypt_Error_Success);
+      r = EverCrypt_Error_Success;
     }
+    KRML_MAYBE_UNUSED_VAR(r);
     return EverCrypt_Error_Success;
   }
   return EverCrypt_Error_UnsupportedAlgorithm;
@@ -960,26 +966,27 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm(
   bool has_aesni = EverCrypt_AutoConfig2_has_aesni();
   if (has_aesni && has_pclmulqdq && has_avx && has_sse && has_movbe)
   {
-    uint8_t ek[544U] = { 0U };
-    uint8_t *keys_b0 = ek;
-    uint8_t *hkeys_b0 = ek + 240U;
+    uint8_t ek0[544U] = { 0U };
+    uint8_t *keys_b0 = ek0;
+    uint8_t *hkeys_b0 = ek0 + 240U;
     aes256_key_expansion(k, keys_b0);
     aes256_keyhash_init(keys_b0, hkeys_b0);
-    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek };
+    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek0 };
     EverCrypt_AEAD_state_s *s = &p;
+    EverCrypt_Error_error_code r;
     if (s == NULL)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+      r = EverCrypt_Error_InvalidKey;
     }
     else if (iv_len == 0U)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+      r = EverCrypt_Error_InvalidIVLength;
     }
     else
     {
-      uint8_t *ek0 = (*s).ek;
-      uint8_t *scratch_b = ek0 + 368U;
-      uint8_t *ek1 = ek0;
+      uint8_t *ek = (*s).ek;
+      uint8_t *scratch_b = ek + 368U;
+      uint8_t *ek1 = ek;
       uint8_t *keys_b = ek1;
       uint8_t *hkeys_b = ek1 + 240U;
       uint8_t tmp_iv[16U] = { 0U };
@@ -1059,8 +1066,9 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm(
       memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
         inout_b,
         (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-      KRML_HOST_IGNORE(EverCrypt_Error_Success);
+      r = EverCrypt_Error_Success;
     }
+    KRML_MAYBE_UNUSED_VAR(r);
     return EverCrypt_Error_Success;
   }
   return EverCrypt_Error_UnsupportedAlgorithm;
diff --git a/src/EverCrypt_HKDF.c b/src/EverCrypt_HKDF.c
index 773f86b8..de54cafc 100644
--- a/src/EverCrypt_HKDF.c
+++ b/src/EverCrypt_HKDF.c
@@ -43,36 +43,45 @@ expand_sha1(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       EverCrypt_HMAC_compute_sha1(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha1(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       EverCrypt_HMAC_compute_sha1(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha1(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -98,36 +107,45 @@ expand_sha2_256(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       EverCrypt_HMAC_compute_sha2_256(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha2_256(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       EverCrypt_HMAC_compute_sha2_256(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha2_256(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -153,36 +171,45 @@ expand_sha2_384(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       EverCrypt_HMAC_compute_sha2_384(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha2_384(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       EverCrypt_HMAC_compute_sha2_384(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha2_384(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -208,36 +235,45 @@ expand_sha2_512(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       EverCrypt_HMAC_compute_sha2_512(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha2_512(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       EverCrypt_HMAC_compute_sha2_512(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_sha2_512(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -263,36 +299,45 @@ expand_blake2s(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       EverCrypt_HMAC_compute_blake2s(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_blake2s(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       EverCrypt_HMAC_compute_blake2s(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_blake2s(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -318,36 +363,45 @@ expand_blake2b(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       EverCrypt_HMAC_compute_blake2b(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_blake2b(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       EverCrypt_HMAC_compute_blake2b(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       EverCrypt_HMAC_compute_blake2b(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
diff --git a/src/EverCrypt_HMAC.c b/src/EverCrypt_HMAC.c
index 90bcaaac..ec48f6e0 100644
--- a/src/EverCrypt_HMAC.c
+++ b/src/EverCrypt_HMAC.c
@@ -81,10 +81,8 @@ EverCrypt_HMAC_compute_sha1(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -105,26 +103,23 @@ EverCrypt_HMAC_compute_sha1(
   {
     Hacl_Hash_SHA1_hash_oneshot(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
   uint32_t s[5U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U, 0xc3d2e1f0U };
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA1_update_last(s, 0ULL, ipad, 64U);
@@ -153,6 +148,7 @@ EverCrypt_HMAC_compute_sha1(
     Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
     Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA1_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA1_init(s);
@@ -189,10 +185,8 @@ EverCrypt_HMAC_compute_sha2_256(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -213,19 +207,17 @@ EverCrypt_HMAC_compute_sha2_256(
   {
     EverCrypt_HMAC_hash_256(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -236,11 +228,10 @@ EverCrypt_HMAC_compute_sha2_256(
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha256_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
@@ -272,6 +263,7 @@ EverCrypt_HMAC_compute_sha2_256(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha256_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha256_init(s);
@@ -311,10 +303,8 @@ EverCrypt_HMAC_compute_sha2_384(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -335,19 +325,17 @@ EverCrypt_HMAC_compute_sha2_384(
   {
     Hacl_Hash_SHA2_hash_384(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -358,11 +346,10 @@ EverCrypt_HMAC_compute_sha2_384(
     0U,
     8U,
     1U,
-    uint64_t *os = st;
     uint64_t x = Hacl_Hash_SHA2_h384[i];
+    uint64_t *os = st;
     os[i] = x;);
   uint64_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
@@ -400,6 +387,7 @@ EverCrypt_HMAC_compute_sha2_384(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha384_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha384_init(s);
@@ -441,10 +429,8 @@ EverCrypt_HMAC_compute_sha2_512(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -465,19 +451,17 @@ EverCrypt_HMAC_compute_sha2_512(
   {
     Hacl_Hash_SHA2_hash_512(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -488,11 +472,10 @@ EverCrypt_HMAC_compute_sha2_512(
     0U,
     8U,
     1U,
-    uint64_t *os = st;
     uint64_t x = Hacl_Hash_SHA2_h512[i];
+    uint64_t *os = st;
     os[i] = x;);
   uint64_t *s = st;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
@@ -530,6 +513,7 @@ EverCrypt_HMAC_compute_sha2_512(
       rem,
       s);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_SHA2_sha512_finish(s, dst1);
   uint8_t *hash1 = ipad;
   Hacl_Hash_SHA2_sha512_init(s);
@@ -571,10 +555,8 @@ EverCrypt_HMAC_compute_blake2s(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -595,19 +577,17 @@ EverCrypt_HMAC_compute_blake2s(
   {
     Hacl_Hash_Blake2s_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -616,11 +596,10 @@ EverCrypt_HMAC_compute_blake2s(
   uint32_t s[16U] = { 0U };
   Hacl_Hash_Blake2s_init(s, 0U, 32U);
   uint32_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -655,10 +634,12 @@ EverCrypt_HMAC_compute_blake2s(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2s_finish(32U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2s_init(s0, 0U, 32U);
@@ -693,6 +674,7 @@ EverCrypt_HMAC_compute_blake2s(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -708,10 +690,8 @@ EverCrypt_HMAC_compute_blake2b(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -732,19 +712,17 @@ EverCrypt_HMAC_compute_blake2b(
   {
     Hacl_Hash_Blake2b_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -753,11 +731,16 @@ EverCrypt_HMAC_compute_blake2b(
   uint64_t s[16U] = { 0U };
   Hacl_Hash_Blake2b_init(s, 0U, 64U);
   uint64_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -792,11 +775,13 @@ EverCrypt_HMAC_compute_blake2b(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2b_finish(64U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2b_init(s0, 0U, 64U);
@@ -831,6 +816,7 @@ EverCrypt_HMAC_compute_blake2b(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/EverCrypt_Hash.c b/src/EverCrypt_Hash.c
index bfafa9be..859909d5 100644
--- a/src/EverCrypt_Hash.c
+++ b/src/EverCrypt_Hash.c
@@ -616,7 +616,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
   {
     uint32_t *p1 = scrut.case_Blake2S_s;
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
   }
   if (scrut.tag == Blake2S_128_s)
@@ -624,7 +624,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Lib_IntVector_Intrinsics_vec128 *p1 = scrut.case_Blake2S_128_s;
     #if HACL_CAN_COMPILE_VEC128
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
     #else
     KRML_MAYBE_UNUSED_VAR(p1);
@@ -638,6 +638,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
@@ -651,6 +652,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_Simd256_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
@@ -1305,6 +1307,7 @@ EverCrypt_Hash_Incremental_state_t
   KRML_CHECK_SIZE(sizeof (uint8_t), block_len(a));
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(block_len(a), sizeof (uint8_t));
   EverCrypt_Hash_state_s *block_state = create_in(a);
+  init(block_state);
   EverCrypt_Hash_Incremental_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   EverCrypt_Hash_Incremental_state_t
@@ -1313,7 +1316,6 @@ EverCrypt_Hash_Incremental_state_t
         EverCrypt_Hash_Incremental_state_t
       ));
   p[0U] = s;
-  init(block_state);
   return p;
 }
 
@@ -1322,15 +1324,12 @@ Reset an existing state to the initial hash state with empty data.
 */
 void EverCrypt_Hash_Incremental_reset(EverCrypt_Hash_Incremental_state_t *state)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
   Spec_Hash_Definitions_hash_alg i = alg_of_state(block_state);
   KRML_MAYBE_UNUSED_VAR(i);
   init(block_state);
-  EverCrypt_Hash_Incremental_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 /**
@@ -1347,9 +1346,8 @@ EverCrypt_Hash_Incremental_update(
   uint32_t chunk_len
 )
 {
-  EverCrypt_Hash_Incremental_state_t s = *state;
-  EverCrypt_Hash_state_s *block_state = s.block_state;
-  uint64_t total_len = s.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   Spec_Hash_Definitions_hash_alg i1 = alg_of_state(block_state);
   uint64_t sw;
   switch (i1)
@@ -1448,10 +1446,8 @@ EverCrypt_Hash_Incremental_update(
     }
     if (chunk_len <= block_len(i1) - sz)
     {
-      EverCrypt_Hash_Incremental_state_t s1 = *state;
-      EverCrypt_Hash_state_s *block_state1 = s1.block_state;
-      uint8_t *buf = s1.buf;
-      uint64_t total_len1 = s1.total_len;
+      uint8_t *buf = (*state).buf;
+      uint64_t total_len1 = (*state).total_len;
       uint32_t sz1;
       if (total_len1 % (uint64_t)block_len(i1) == 0ULL && total_len1 > 0ULL)
       {
@@ -1464,22 +1460,12 @@ EverCrypt_Hash_Incremental_update(
       uint8_t *buf2 = buf + sz1;
       memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
       uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state1,
-            .buf = buf,
-            .total_len = total_len2
-          }
-        );
+      state->total_len = total_len2;
     }
     else if (sz == 0U)
     {
-      EverCrypt_Hash_Incremental_state_t s1 = *state;
-      EverCrypt_Hash_state_s *block_state1 = s1.block_state;
-      uint8_t *buf = s1.buf;
-      uint64_t total_len1 = s1.total_len;
+      uint8_t *buf = (*state).buf;
+      uint64_t total_len1 = (*state).total_len;
       uint32_t sz1;
       if (total_len1 % (uint64_t)block_len(i1) == 0ULL && total_len1 > 0ULL)
       {
@@ -1492,7 +1478,7 @@ EverCrypt_Hash_Incremental_update(
       if (!(sz1 == 0U))
       {
         uint64_t prevlen = total_len1 - (uint64_t)sz1;
-        update_multi(block_state1, prevlen, buf, block_len(i1));
+        update_multi(block_state, prevlen, buf, block_len(i1));
       }
       uint32_t ite0;
       if ((uint64_t)chunk_len % (uint64_t)block_len(i1) == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1508,28 +1494,18 @@ EverCrypt_Hash_Incremental_update(
       uint32_t data2_len = chunk_len - data1_len;
       uint8_t *data1 = chunk;
       uint8_t *data2 = chunk + data1_len;
-      update_multi(block_state1, total_len1, data1, data1_len);
+      update_multi(block_state, total_len1, data1, data1_len);
       uint8_t *dst = buf;
       memcpy(dst, data2, data2_len * sizeof (uint8_t));
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state1,
-            .buf = buf,
-            .total_len = total_len1 + (uint64_t)chunk_len
-          }
-        );
+      state->total_len = total_len1 + (uint64_t)chunk_len;
     }
     else
     {
       uint32_t diff = block_len(i1) - sz;
       uint8_t *chunk1 = chunk;
       uint8_t *chunk2 = chunk + diff;
-      EverCrypt_Hash_Incremental_state_t s1 = *state;
-      EverCrypt_Hash_state_s *block_state10 = s1.block_state;
-      uint8_t *buf0 = s1.buf;
-      uint64_t total_len10 = s1.total_len;
+      uint8_t *buf = (*state).buf;
+      uint64_t total_len10 = (*state).total_len;
       uint32_t sz10;
       if (total_len10 % (uint64_t)block_len(i1) == 0ULL && total_len10 > 0ULL)
       {
@@ -1539,22 +1515,12 @@ EverCrypt_Hash_Incremental_update(
       {
         sz10 = (uint32_t)(total_len10 % (uint64_t)block_len(i1));
       }
-      uint8_t *buf2 = buf0 + sz10;
+      uint8_t *buf2 = buf + sz10;
       memcpy(buf2, chunk1, diff * sizeof (uint8_t));
       uint64_t total_len2 = total_len10 + (uint64_t)diff;
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state10,
-            .buf = buf0,
-            .total_len = total_len2
-          }
-        );
-      EverCrypt_Hash_Incremental_state_t s10 = *state;
-      EverCrypt_Hash_state_s *block_state1 = s10.block_state;
-      uint8_t *buf = s10.buf;
-      uint64_t total_len1 = s10.total_len;
+      state->total_len = total_len2;
+      uint8_t *buf0 = (*state).buf;
+      uint64_t total_len1 = (*state).total_len;
       uint32_t sz1;
       if (total_len1 % (uint64_t)block_len(i1) == 0ULL && total_len1 > 0ULL)
       {
@@ -1567,7 +1533,7 @@ EverCrypt_Hash_Incremental_update(
       if (!(sz1 == 0U))
       {
         uint64_t prevlen = total_len1 - (uint64_t)sz1;
-        update_multi(block_state1, prevlen, buf, block_len(i1));
+        update_multi(block_state, prevlen, buf0, block_len(i1));
       }
       uint32_t ite0;
       if
@@ -1589,18 +1555,10 @@ EverCrypt_Hash_Incremental_update(
       uint32_t data2_len = chunk_len - diff - data1_len;
       uint8_t *data1 = chunk2;
       uint8_t *data2 = chunk2 + data1_len;
-      update_multi(block_state1, total_len1, data1, data1_len);
-      uint8_t *dst = buf;
+      update_multi(block_state, total_len1, data1, data1_len);
+      uint8_t *dst = buf0;
       memcpy(dst, data2, data2_len * sizeof (uint8_t));
-      *state
-      =
-        (
-          (EverCrypt_Hash_Incremental_state_t){
-            .block_state = block_state1,
-            .buf = buf,
-            .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-          }
-        );
+      state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
     }
     ite = Hacl_Streaming_Types_Success;
   }
@@ -1624,10 +1582,9 @@ EverCrypt_Hash_Incremental_update(
 
 static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_MD5) == 0ULL && total_len > 0ULL)
   {
@@ -1643,6 +1600,7 @@ static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outpu
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_MD5) == 0U && r > 0U)
   {
@@ -1653,7 +1611,6 @@ static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outpu
     ite = r % block_len(Spec_Hash_Definitions_MD5);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1662,10 +1619,9 @@ static void digest_md5(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outpu
 
 static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA1) == 0ULL && total_len > 0ULL)
   {
@@ -1681,6 +1637,7 @@ static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outp
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA1) == 0U && r > 0U)
   {
@@ -1691,7 +1648,6 @@ static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outp
     ite = r % block_len(Spec_Hash_Definitions_SHA1);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1700,10 +1656,9 @@ static void digest_sha1(EverCrypt_Hash_Incremental_state_t *state, uint8_t *outp
 
 static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_224) == 0ULL && total_len > 0ULL)
@@ -1720,6 +1675,7 @@ static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_224) == 0U && r > 0U)
   {
@@ -1730,7 +1686,6 @@ static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_224);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1739,10 +1694,9 @@ static void digest_sha224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_256) == 0ULL && total_len > 0ULL)
@@ -1759,6 +1713,7 @@ static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_256) == 0U && r > 0U)
   {
@@ -1769,7 +1724,6 @@ static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_256);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1778,10 +1732,9 @@ static void digest_sha256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_224) == 0ULL && total_len > 0ULL)
@@ -1798,6 +1751,7 @@ static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_224) == 0U && r > 0U)
   {
@@ -1808,7 +1762,6 @@ static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_224);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1817,10 +1770,9 @@ static void digest_sha3_224(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_256) == 0ULL && total_len > 0ULL)
@@ -1837,6 +1789,7 @@ static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_256) == 0U && r > 0U)
   {
@@ -1847,7 +1800,6 @@ static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_256);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1856,10 +1808,9 @@ static void digest_sha3_256(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_384) == 0ULL && total_len > 0ULL)
@@ -1876,6 +1827,7 @@ static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_384) == 0U && r > 0U)
   {
@@ -1886,7 +1838,6 @@ static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_384);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1895,10 +1846,9 @@ static void digest_sha3_384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA3_512) == 0ULL && total_len > 0ULL)
@@ -1915,6 +1865,7 @@ static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA3_512) == 0U && r > 0U)
   {
@@ -1925,7 +1876,6 @@ static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
     ite = r % block_len(Spec_Hash_Definitions_SHA3_512);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1934,10 +1884,9 @@ static void digest_sha3_512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *
 
 static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_384) == 0ULL && total_len > 0ULL)
@@ -1954,6 +1903,7 @@ static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_384) == 0U && r > 0U)
   {
@@ -1964,7 +1914,6 @@ static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_384);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -1973,10 +1922,9 @@ static void digest_sha384(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if
   (total_len % (uint64_t)block_len(Spec_Hash_Definitions_SHA2_512) == 0ULL && total_len > 0ULL)
@@ -1993,6 +1941,7 @@ static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_SHA2_512) == 0U && r > 0U)
   {
@@ -2003,7 +1952,6 @@ static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
     ite = r % block_len(Spec_Hash_Definitions_SHA2_512);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -2012,10 +1960,9 @@ static void digest_sha512(EverCrypt_Hash_Incremental_state_t *state, uint8_t *ou
 
 static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_Blake2S) == 0ULL && total_len > 0ULL)
   {
@@ -2046,6 +1993,7 @@ static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_Blake2S) == 0U && r > 0U)
   {
@@ -2056,7 +2004,6 @@ static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
     ite = r % block_len(Spec_Hash_Definitions_Blake2S);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -2065,10 +2012,9 @@ static void digest_blake2s(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
 
 static void digest_blake2b(EverCrypt_Hash_Incremental_state_t *state, uint8_t *output)
 {
-  EverCrypt_Hash_Incremental_state_t scrut = *state;
-  EverCrypt_Hash_state_s *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  EverCrypt_Hash_state_s *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(Spec_Hash_Definitions_Blake2B) == 0ULL && total_len > 0ULL)
   {
@@ -2099,6 +2045,7 @@ static void digest_blake2b(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
   EverCrypt_Hash_state_s tmp_block_state = s;
   copy(block_state, &tmp_block_state);
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(Spec_Hash_Definitions_Blake2B) == 0U && r > 0U)
   {
@@ -2109,7 +2056,6 @@ static void digest_blake2b(EverCrypt_Hash_Incremental_state_t *state, uint8_t *o
     ite = r % block_len(Spec_Hash_Definitions_Blake2B);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   update_multi(&tmp_block_state, prev_len, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   update_last(&tmp_block_state, prev_len_last, buf_last, r);
@@ -2228,8 +2174,8 @@ void EverCrypt_Hash_Incremental_hash_256(uint8_t *output, uint8_t *input, uint32
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
   uint32_t blocks_n0 = input_len / 64U;
@@ -2266,8 +2212,8 @@ static void hash_224(uint8_t *output, uint8_t *input, uint32_t input_len)
     0U,
     8U,
     1U,
-    uint32_t *os = st;
     uint32_t x = Hacl_Hash_SHA2_h224[i];
+    uint32_t *os = st;
     os[i] = x;);
   uint32_t *s = st;
   uint32_t blocks_n0 = input_len / 64U;
diff --git a/src/Hacl_AEAD_Chacha20Poly1305.c b/src/Hacl_AEAD_Chacha20Poly1305.c
index d5926093..4b683308 100644
--- a/src/Hacl_AEAD_Chacha20Poly1305.c
+++ b/src/Hacl_AEAD_Chacha20Poly1305.c
@@ -579,7 +579,8 @@ Hacl_AEAD_Chacha20Poly1305_encrypt(
 {
   Hacl_Chacha20_chacha20_encrypt(input_len, output, input, key, nonce, 1U);
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_32(key1, data_len, data, input_len, output, tag);
 }
@@ -618,7 +619,8 @@ Hacl_AEAD_Chacha20Poly1305_decrypt(
 {
   uint8_t computed_tag[16U] = { 0U };
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_chacha20_encrypt(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_32(key1, data_len, data, input_len, input, computed_tag);
   uint8_t res = 255U;
diff --git a/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c b/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c
index 0cfa41fd..38494f80 100644
--- a/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c
+++ b/src/Hacl_AEAD_Chacha20Poly1305_Simd128.c
@@ -1095,7 +1095,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd128_encrypt(
 {
   Hacl_Chacha20_Vec128_chacha20_encrypt_128(input_len, output, input, key, nonce, 1U);
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_128(key1, data_len, data, input_len, output, tag);
 }
@@ -1134,7 +1135,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd128_decrypt(
 {
   uint8_t computed_tag[16U] = { 0U };
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec128_chacha20_encrypt_128(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_128(key1, data_len, data, input_len, input, computed_tag);
   uint8_t res = 255U;
diff --git a/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c b/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c
index 28414516..edf44f38 100644
--- a/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c
+++ b/src/Hacl_AEAD_Chacha20Poly1305_Simd256.c
@@ -1096,7 +1096,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd256_encrypt(
 {
   Hacl_Chacha20_Vec256_chacha20_encrypt_256(input_len, output, input, key, nonce, 1U);
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_256(key1, data_len, data, input_len, output, tag);
 }
@@ -1135,7 +1136,8 @@ Hacl_AEAD_Chacha20Poly1305_Simd256_decrypt(
 {
   uint8_t computed_tag[16U] = { 0U };
   uint8_t tmp[64U] = { 0U };
-  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp, key, nonce, 0U);
+  uint8_t tmp_copy[64U] = { 0U };
+  Hacl_Chacha20_Vec256_chacha20_encrypt_256(64U, tmp, tmp_copy, key, nonce, 0U);
   uint8_t *key1 = tmp;
   poly1305_do_256(key1, data_len, data, input_len, input, computed_tag);
   uint8_t res = 255U;
diff --git a/src/Hacl_Bignum.c b/src/Hacl_Bignum.c
index 568bcc26..fcb722d6 100644
--- a/src/Hacl_Bignum.c
+++ b/src/Hacl_Bignum.c
@@ -54,8 +54,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint32_t *os = t0;
     uint32_t x = ((0U - c0) & t0[i]) | (~(0U - c0) & tmp_[i]);
+    uint32_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c10);
@@ -64,8 +64,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, b1, b0, t1);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint32_t *os = t1;
     uint32_t x = ((0U - c010) & t1[i]) | (~(0U - c010) & tmp_[i]);
+    uint32_t *os = t1;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -77,6 +77,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a0, b0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(len2, a1, b1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint32_t *r011 = res;
   uint32_t *r231 = res + aLen;
   uint32_t *t01 = tmp;
@@ -92,37 +94,47 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
   uint32_t mask = 0U - c_sign;
   for (uint32_t i = 0U; i < aLen; i++)
   {
-    uint32_t *os = t45;
     uint32_t x = (mask & t45[i]) | (~mask & t67[i]);
+    uint32_t *os = t45;
     os[i] = x;
   }
   uint32_t c5 = (mask & c41) | (~mask & c31);
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r0 = res + aLen2;
-  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0);
-  uint32_t c6 = r10;
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint32_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint32_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint32_t));
+  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, a_copy, b_copy, r0);
+  uint32_t r11 = r10;
+  uint32_t c6 = r11;
   uint32_t c60 = c6;
   uint32_t c7 = c5 + c60;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r = res + aLen + aLen2;
   uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32(0U, r[0U], c7, r);
   uint32_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint32_t *a11 = r + 1U;
     uint32_t *res1 = r + 1U;
     uint32_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint32_t t11 = a11[4U * i];
+      uint32_t t11 = res1[4U * i];
       uint32_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, 0U, res_i0);
-      uint32_t t110 = a11[4U * i + 1U];
+      uint32_t t110 = res1[4U * i + 1U];
       uint32_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t110, 0U, res_i1);
-      uint32_t t111 = a11[4U * i + 2U];
+      uint32_t t111 = res1[4U * i + 2U];
       uint32_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t111, 0U, res_i2);
-      uint32_t t112 = a11[4U * i + 3U];
+      uint32_t t112 = res1[4U * i + 3U];
       uint32_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t112, 0U, res_i);
     }
@@ -133,7 +145,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint32_t t11 = a11[i];
+      uint32_t t11 = res1[i];
       uint32_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, 0U, res_i);
     }
@@ -176,8 +188,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t c10 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint64_t *os = t0;
     uint64_t x = ((0ULL - c0) & t0[i]) | (~(0ULL - c0) & tmp_[i]);
+    uint64_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c10);
@@ -186,8 +198,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, b1, b0, t1);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint64_t *os = t1;
     uint64_t x = ((0ULL - c010) & t1[i]) | (~(0ULL - c010) & tmp_[i]);
+    uint64_t *os = t1;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -199,6 +211,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a0, b0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(len2, a1, b1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint64_t *r011 = res;
   uint64_t *r231 = res + aLen;
   uint64_t *t01 = tmp;
@@ -214,37 +228,47 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
   uint64_t mask = 0ULL - c_sign;
   for (uint32_t i = 0U; i < aLen; i++)
   {
-    uint64_t *os = t45;
     uint64_t x = (mask & t45[i]) | (~mask & t67[i]);
+    uint64_t *os = t45;
     os[i] = x;
   }
   uint64_t c5 = (mask & c41) | (~mask & c31);
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r0 = res + aLen2;
-  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0);
-  uint64_t c6 = r10;
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint64_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint64_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint64_t));
+  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, a_copy, b_copy, r0);
+  uint64_t r11 = r10;
+  uint64_t c6 = r11;
   uint64_t c60 = c6;
   uint64_t c7 = c5 + c60;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r = res + aLen + aLen2;
   uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64(0ULL, r[0U], c7, r);
   uint64_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint64_t *a11 = r + 1U;
     uint64_t *res1 = r + 1U;
     uint64_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint64_t t11 = a11[4U * i];
+      uint64_t t11 = res1[4U * i];
       uint64_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i0);
-      uint64_t t110 = a11[4U * i + 1U];
+      uint64_t t110 = res1[4U * i + 1U];
       uint64_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t110, 0ULL, res_i1);
-      uint64_t t111 = a11[4U * i + 2U];
+      uint64_t t111 = res1[4U * i + 2U];
       uint64_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t111, 0ULL, res_i2);
-      uint64_t t112 = a11[4U * i + 3U];
+      uint64_t t112 = res1[4U * i + 3U];
       uint64_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t112, 0ULL, res_i);
     }
@@ -255,7 +279,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint64_t t11 = a11[i];
+      uint64_t t11 = res1[i];
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i);
     }
@@ -294,8 +318,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
   uint32_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint32_t *os = t0;
     uint32_t x = ((0U - c0) & t0[i]) | (~(0U - c0) & tmp_[i]);
+    uint32_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -308,6 +332,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
   uint32_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(len2, a1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint32_t *r011 = res;
   uint32_t *r231 = res + aLen;
   uint32_t *t01 = tmp;
@@ -317,31 +343,41 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
   uint32_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u32(aLen, t01, t231, t45);
   uint32_t c5 = c2 - c3;
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r0 = res + aLen2;
-  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, r0, t45, r0);
-  uint32_t c4 = r10;
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), aLen);
+  uint32_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint32_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint32_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint32_t));
+  uint32_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen, a_copy, b_copy, r0);
+  uint32_t r11 = r10;
+  uint32_t c4 = r11;
   uint32_t c6 = c4;
   uint32_t c7 = c5 + c6;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint32_t *r = res + aLen + aLen2;
   uint32_t c01 = Lib_IntTypes_Intrinsics_add_carry_u32(0U, r[0U], c7, r);
   uint32_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint32_t *a11 = r + 1U;
     uint32_t *res1 = r + 1U;
     uint32_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint32_t t1 = a11[4U * i];
+      uint32_t t1 = res1[4U * i];
       uint32_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, 0U, res_i0);
-      uint32_t t10 = a11[4U * i + 1U];
+      uint32_t t10 = res1[4U * i + 1U];
       uint32_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, 0U, res_i1);
-      uint32_t t11 = a11[4U * i + 2U];
+      uint32_t t11 = res1[4U * i + 2U];
       uint32_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, 0U, res_i2);
-      uint32_t t12 = a11[4U * i + 3U];
+      uint32_t t12 = res1[4U * i + 3U];
       uint32_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, 0U, res_i);
     }
@@ -352,7 +388,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint32_t t1 = a11[i];
+      uint32_t t1 = res1[i];
       uint32_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, 0U, res_i);
     }
@@ -391,8 +427,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
   uint64_t c1 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(len2, a1, a0, t0);
   for (uint32_t i = 0U; i < len2; i++)
   {
-    uint64_t *os = t0;
     uint64_t x = ((0ULL - c0) & t0[i]) | (~(0ULL - c0) & tmp_[i]);
+    uint64_t *os = t0;
     os[i] = x;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
@@ -405,6 +441,8 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
   uint64_t *r23 = res + aLen;
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a0, tmp1, r01);
   Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(len2, a1, tmp1, r23);
+  KRML_MAYBE_UNUSED_VAR(res);
+  KRML_MAYBE_UNUSED_VAR(tmp);
   uint64_t *r011 = res;
   uint64_t *r231 = res + aLen;
   uint64_t *t01 = tmp;
@@ -414,31 +452,41 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
   uint64_t c3 = Hacl_Bignum_Addition_bn_sub_eq_len_u64(aLen, t01, t231, t45);
   uint64_t c5 = c2 - c3;
   uint32_t aLen2 = aLen / 2U;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r0 = res + aLen2;
-  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, r0, t45, r0);
-  uint64_t c4 = r10;
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t a_copy[aLen];
+  memset(a_copy, 0U, aLen * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), aLen);
+  uint64_t b_copy[aLen];
+  memset(b_copy, 0U, aLen * sizeof (uint64_t));
+  memcpy(a_copy, r0, aLen * sizeof (uint64_t));
+  memcpy(b_copy, t45, aLen * sizeof (uint64_t));
+  uint64_t r10 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen, a_copy, b_copy, r0);
+  uint64_t r11 = r10;
+  uint64_t c4 = r11;
   uint64_t c6 = c4;
   uint64_t c7 = c5 + c6;
+  KRML_MAYBE_UNUSED_VAR(res);
   uint64_t *r = res + aLen + aLen2;
   uint64_t c01 = Lib_IntTypes_Intrinsics_add_carry_u64(0ULL, r[0U], c7, r);
   uint64_t r1;
   if (1U < aLen + aLen - (aLen + aLen2))
   {
-    uint64_t *a11 = r + 1U;
     uint64_t *res1 = r + 1U;
     uint64_t c = c01;
     for (uint32_t i = 0U; i < (aLen + aLen - (aLen + aLen2) - 1U) / 4U; i++)
     {
-      uint64_t t1 = a11[4U * i];
+      uint64_t t1 = res1[4U * i];
       uint64_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i0);
-      uint64_t t10 = a11[4U * i + 1U];
+      uint64_t t10 = res1[4U * i + 1U];
       uint64_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, 0ULL, res_i1);
-      uint64_t t11 = a11[4U * i + 2U];
+      uint64_t t11 = res1[4U * i + 2U];
       uint64_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i2);
-      uint64_t t12 = a11[4U * i + 3U];
+      uint64_t t12 = res1[4U * i + 3U];
       uint64_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, 0ULL, res_i);
     }
@@ -449,7 +497,7 @@ Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
       < aLen + aLen - (aLen + aLen2) - 1U;
       i++)
     {
-      uint64_t t1 = a11[i];
+      uint64_t t1 = res1[i];
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i);
     }
@@ -537,8 +585,8 @@ Hacl_Bignum_bn_add_mod_n_u32(
   uint32_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -614,8 +662,8 @@ Hacl_Bignum_bn_add_mod_n_u64(
   uint64_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -692,8 +740,8 @@ Hacl_Bignum_bn_sub_mod_n_u32(
   uint32_t c2 = 0U - c00;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -770,8 +818,8 @@ Hacl_Bignum_bn_sub_mod_n_u64(
   uint64_t c2 = 0ULL - c00;
   for (uint32_t i = 0U; i < len1; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -832,7 +880,7 @@ uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m1 = acc;
   return m0 & m1;
@@ -852,7 +900,15 @@ Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(
   res[i] = res[i] | 1U << j;
   for (uint32_t i0 = 0U; i0 < 64U * len - nBits; i0++)
   {
-    Hacl_Bignum_bn_add_mod_n_u32(len, n, res, res, res);
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t a_copy[len];
+    memset(a_copy, 0U, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t b_copy[len];
+    memset(b_copy, 0U, len * sizeof (uint32_t));
+    memcpy(a_copy, res, len * sizeof (uint32_t));
+    memcpy(b_copy, res, len * sizeof (uint32_t));
+    Hacl_Bignum_bn_add_mod_n_u32(len, n, a_copy, b_copy, res);
   }
 }
 
@@ -888,8 +944,8 @@ bn_mont_reduction_u32(uint32_t len, uint32_t *n, uint32_t nInv, uint32_t *c, uin
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + len + i0;
     uint32_t res_j = c[len + i0];
+    uint32_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint32_t));
@@ -928,8 +984,8 @@ bn_mont_reduction_u32(uint32_t len, uint32_t *n, uint32_t nInv, uint32_t *c, uin
   uint32_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -1023,7 +1079,7 @@ uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   return m0 & m1;
@@ -1043,7 +1099,15 @@ Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(
   res[i] = res[i] | 1ULL << j;
   for (uint32_t i0 = 0U; i0 < 128U * len - nBits; i0++)
   {
-    Hacl_Bignum_bn_add_mod_n_u64(len, n, res, res, res);
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t a_copy[len];
+    memset(a_copy, 0U, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t b_copy[len];
+    memset(b_copy, 0U, len * sizeof (uint64_t));
+    memcpy(a_copy, res, len * sizeof (uint64_t));
+    memcpy(b_copy, res, len * sizeof (uint64_t));
+    Hacl_Bignum_bn_add_mod_n_u64(len, n, a_copy, b_copy, res);
   }
 }
 
@@ -1079,8 +1143,8 @@ bn_mont_reduction_u64(uint32_t len, uint64_t *n, uint64_t nInv, uint64_t *c, uin
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + len + i0;
     uint64_t res_j = c[len + i0];
+    uint64_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint64_t));
@@ -1119,8 +1183,8 @@ bn_mont_reduction_u64(uint32_t len, uint64_t *n, uint64_t nInv, uint64_t *c, uin
   uint64_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -1238,8 +1302,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u32(
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + len + i0;
     uint32_t res_j = c[len + i0];
+    uint32_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint32_t));
@@ -1252,8 +1316,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u32(
   uint32_t m = 0U - c00;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -1335,8 +1399,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u64(
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + len + i0;
     uint64_t res_j = c[len + i0];
+    uint64_t *resb = c + len + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + len, (len + len - len) * sizeof (uint64_t));
@@ -1349,8 +1413,8 @@ Hacl_Bignum_AlmostMontgomery_bn_almost_mont_reduction_u64(
   uint64_t m = 0ULL - c00;
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -1415,7 +1479,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m10 = acc0;
   uint32_t m00 = m0 & m10;
@@ -1442,7 +1506,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
     {
       uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
       uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint32_t res = acc;
     m1 = res;
@@ -1456,7 +1520,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t m = m1 & m2;
@@ -1489,9 +1553,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
     memset(ctx, 0U, (len + len) * sizeof (uint32_t));
     memcpy(ctx, n, len * sizeof (uint32_t));
     memcpy(ctx + len, r2, len * sizeof (uint32_t));
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -1500,11 +1565,21 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
       uint32_t bit = tmp >> j & 1U;
       if (!(bit == 0U))
       {
-        uint32_t *ctx_n0 = ctx;
-        bn_almost_mont_mul_u32(len, ctx_n0, mu, resM, aM, resM);
+        KRML_CHECK_SIZE(sizeof (uint32_t), len);
+        uint32_t aM_copy[len];
+        memset(aM_copy, 0U, len * sizeof (uint32_t));
+        memcpy(aM_copy, resM, len * sizeof (uint32_t));
+        uint32_t *ctx_n = ctx;
+        bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint32_t *ctx_n0 = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n0, mu, aM, aM);
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, aM, len * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     Hacl_Bignum_Montgomery_bn_from_mont_u32(len, n, mu, resM, res);
     return;
@@ -1541,18 +1616,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u32(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u32(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, aM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -1567,6 +1654,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint32_t), len);
   uint32_t tmp0[len];
@@ -1577,15 +1665,26 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, resM, len * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = bits_l;
     const uint32_t *a_bits_l = table + bits_l32 * len;
     memcpy(tmp0, (uint32_t *)a_bits_l, len * sizeof (uint32_t));
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, resM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, n, mu, resM, res);
 }
@@ -1617,9 +1716,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
     memcpy(ctx, n, len * sizeof (uint32_t));
     memcpy(ctx + len, r2, len * sizeof (uint32_t));
     uint32_t sw = 0U;
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -1633,10 +1733,20 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint32_t *ctx_n0 = ctx;
-      bn_almost_mont_mul_u32(len, ctx_n0, mu, aM, resM, aM);
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, aM, len * sizeof (uint32_t));
       uint32_t *ctx_n1 = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n1, mu, resM, resM);
+      bn_almost_mont_mul_u32(len, ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy0[len];
+      memset(aM_copy0, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy0, resM, len * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -1681,18 +1791,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u32(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u32(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, aM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -1707,8 +1829,8 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
       const uint32_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint32_t *os = resM;
         uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint32_t *os = resM;
         os[i] = x;
       });
   }
@@ -1717,6 +1839,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u32(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint32_t), len);
   uint32_t tmp0[len];
@@ -1727,10 +1850,16 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint32_t), len);
+      uint32_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint32_t));
+      memcpy(aM_copy, resM, len * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u32(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u32(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint32_t *)(table + 0U * len), len * sizeof (uint32_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -1740,12 +1869,17 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
       const uint32_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint32_t *os = tmp0;
         uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint32_t *os = tmp0;
         os[i] = x;
       });
+    KRML_CHECK_SIZE(sizeof (uint32_t), len);
+    uint32_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint32_t));
+    memcpy(aM_copy, resM, len * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    bn_almost_mont_mul_u32(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u32(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u32(len, n, mu, resM, res);
 }
@@ -1809,7 +1943,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m10 = acc0;
   uint64_t m00 = m0 & m10;
@@ -1836,7 +1970,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
     {
       uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t res = acc;
     m1 = res;
@@ -1850,7 +1984,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t m = m1 & m2;
@@ -1883,9 +2017,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
     memset(ctx, 0U, (len + len) * sizeof (uint64_t));
     memcpy(ctx, n, len * sizeof (uint64_t));
     memcpy(ctx + len, r2, len * sizeof (uint64_t));
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -1894,11 +2029,21 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
       uint64_t bit = tmp >> j & 1ULL;
       if (!(bit == 0ULL))
       {
-        uint64_t *ctx_n0 = ctx;
-        bn_almost_mont_mul_u64(len, ctx_n0, mu, resM, aM, resM);
+        KRML_CHECK_SIZE(sizeof (uint64_t), len);
+        uint64_t aM_copy[len];
+        memset(aM_copy, 0U, len * sizeof (uint64_t));
+        memcpy(aM_copy, resM, len * sizeof (uint64_t));
+        uint64_t *ctx_n = ctx;
+        bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint64_t *ctx_n0 = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n0, mu, aM, aM);
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, aM, len * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     Hacl_Bignum_Montgomery_bn_from_mont_u64(len, n, mu, resM, res);
     return;
@@ -1935,18 +2080,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u64(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u64(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, aM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -1961,6 +2118,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint64_t), len);
   uint64_t tmp0[len];
@@ -1971,15 +2129,26 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, resM, len * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = (uint32_t)bits_l;
     const uint64_t *a_bits_l = table + bits_l32 * len;
     memcpy(tmp0, (uint64_t *)a_bits_l, len * sizeof (uint64_t));
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, resM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, n, mu, resM, res);
 }
@@ -2011,9 +2180,10 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
     memcpy(ctx, n, len * sizeof (uint64_t));
     memcpy(ctx + len, r2, len * sizeof (uint64_t));
     uint64_t sw = 0ULL;
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + len;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -2027,10 +2197,20 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint64_t *ctx_n0 = ctx;
-      bn_almost_mont_mul_u64(len, ctx_n0, mu, aM, resM, aM);
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, aM, len * sizeof (uint64_t));
       uint64_t *ctx_n1 = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n1, mu, resM, resM);
+      bn_almost_mont_mul_u64(len, ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy0[len];
+      memset(aM_copy0, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy0, resM, len * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -2075,18 +2255,30 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + len;
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, len * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy0[len];
+    memset(aM_copy0, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy0, t11, len * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    bn_almost_mont_sqr_u64(len, ctx_n1, mu, t11, tmp);
+    bn_almost_mont_sqr_u64(len, ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * len, tmp, len * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * len;
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, aM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, aM, t2, tmp);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * len, tmp, len * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -2101,8 +2293,8 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
       const uint64_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint64_t *os = resM;
         uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint64_t *os = resM;
         os[i] = x;
       });
   }
@@ -2111,6 +2303,7 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len;
     Hacl_Bignum_Montgomery_bn_from_mont_u64(len, ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   KRML_CHECK_SIZE(sizeof (uint64_t), len);
   uint64_t tmp0[len];
@@ -2121,10 +2314,16 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
       0U,
       4U,
       1U,
+      KRML_CHECK_SIZE(sizeof (uint64_t), len);
+      uint64_t aM_copy[len];
+      memset(aM_copy, 0U, len * sizeof (uint64_t));
+      memcpy(aM_copy, resM, len * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      bn_almost_mont_sqr_u64(len, ctx_n, mu, resM, resM););
+      bn_almost_mont_sqr_u64(len, ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)(table + 0U * len), len * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -2134,12 +2333,17 @@ Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
       const uint64_t *res_j = table + (i1 + 1U) * len;
       for (uint32_t i = 0U; i < len; i++)
       {
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;
       });
+    KRML_CHECK_SIZE(sizeof (uint64_t), len);
+    uint64_t aM_copy[len];
+    memset(aM_copy, 0U, len * sizeof (uint64_t));
+    memcpy(aM_copy, resM, len * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    bn_almost_mont_mul_u64(len, ctx_n, mu, resM, tmp0, resM);
+    bn_almost_mont_mul_u64(len, ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   Hacl_Bignum_Montgomery_bn_from_mont_u64(len, n, mu, resM, res);
 }
diff --git a/src/Hacl_Bignum256.c b/src/Hacl_Bignum256.c
index 54bbc88a..d1a118c1 100644
--- a/src/Hacl_Bignum256.c
+++ b/src/Hacl_Bignum256.c
@@ -171,8 +171,8 @@ void Hacl_Bignum256_add_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -235,8 +235,8 @@ void Hacl_Bignum256_sub_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -287,8 +287,8 @@ void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *ab = a;
     uint64_t a_j = a[i0];
+    uint64_t *ab = a;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -314,7 +314,12 @@ void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
     }
     uint64_t r = c;
     res[i0 + i0] = r;);
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, res, res);
+  uint64_t a_copy0[8U] = { 0U };
+  uint64_t b_copy0[8U] = { 0U };
+  memcpy(a_copy0, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy0, res, 8U * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint64_t tmp[8U] = { 0U };
   KRML_MAYBE_FOR4(i,
@@ -326,7 +331,12 @@ void Hacl_Bignum256_sqr(uint64_t *a, uint64_t *res)
     uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, tmp, res);
+  uint64_t a_copy[8U] = { 0U };
+  uint64_t b_copy[8U] = { 0U };
+  memcpy(a_copy, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy, tmp, 8U * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -338,7 +348,11 @@ static inline void precompr2(uint32_t nBits, uint64_t *n, uint64_t *res)
   res[i] = res[i] | 1ULL << j;
   for (uint32_t i0 = 0U; i0 < 512U - nBits; i0++)
   {
-    Hacl_Bignum256_add_mod(n, res, res, res);
+    uint64_t a_copy[4U] = { 0U };
+    uint64_t b_copy[4U] = { 0U };
+    memcpy(a_copy, res, 4U * sizeof (uint64_t));
+    memcpy(b_copy, res, 4U * sizeof (uint64_t));
+    Hacl_Bignum256_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -368,8 +382,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 4U + i0;
     uint64_t res_j = c[4U + i0];
+    uint64_t *resb = c + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb););
   memcpy(res, c + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -399,8 +413,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -444,8 +458,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
     }
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 4U + i0;
     uint64_t res_j = c[4U + i0];
+    uint64_t *resb = c + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb););
   memcpy(res, c + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -457,8 +471,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -512,7 +526,7 @@ bool Hacl_Bignum256_mod(uint64_t *n, uint64_t *a, uint64_t *res)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t m1 = acc;
   uint64_t is_valid_m = m0 & m1;
   uint32_t nBits = 64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64(4U, n);
@@ -544,7 +558,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint64_t m10 = acc0;
   uint64_t m00 = m0 & m10;
   uint32_t bLen;
@@ -570,7 +584,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     {
       uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t res = acc;
     m1 = res;
@@ -586,7 +600,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t m2 = acc;
   uint64_t m = m1 & m2;
   return m00 & m;
@@ -611,9 +625,10 @@ exp_vartime_precomp(
     uint64_t ctx[8U] = { 0U };
     memcpy(ctx, n, 4U * sizeof (uint64_t));
     memcpy(ctx + 4U, r2, 4U * sizeof (uint64_t));
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -622,11 +637,17 @@ exp_vartime_precomp(
       uint64_t bit = tmp >> j & 1ULL;
       if (!(bit == 0ULL))
       {
-        uint64_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint64_t aM_copy[4U] = { 0U };
+        memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
+        uint64_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint64_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -653,18 +674,26 @@ exp_vartime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 4U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 4U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 4U;
+    uint64_t aM_copy0[4U] = { 0U };
+    memcpy(aM_copy0, t11, 4U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 4U, tmp, 4U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 4U;
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 4U, tmp, 4U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -679,6 +708,7 @@ exp_vartime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[4U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -687,15 +717,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = (uint32_t)bits_l;
     const uint64_t *a_bits_l = table + bits_l32 * 4U;
     memcpy(tmp0, (uint64_t *)a_bits_l, 4U * sizeof (uint64_t));
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -720,9 +757,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 4U * sizeof (uint64_t));
     memcpy(ctx + 4U, r2, 4U * sizeof (uint64_t));
     uint64_t sw = 0ULL;
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -737,10 +775,16 @@ exp_consttime_precomp(
         uint64_t dummy = (0ULL - sw1) & (resM[i] ^ aM[i]);
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;);
-      uint64_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
       uint64_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint64_t aM_copy0[4U] = { 0U };
+      memcpy(aM_copy0, resM, 4U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -776,18 +820,26 @@ exp_consttime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 4U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 4U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 4U;
+    uint64_t aM_copy0[4U] = { 0U };
+    memcpy(aM_copy0, t11, 4U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 4U, tmp, 4U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 4U;
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, aM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 4U, tmp, 4U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -804,8 +856,8 @@ exp_consttime_precomp(
         0U,
         4U,
         1U,
-        uint64_t *os = resM;
         uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint64_t *os = resM;
         os[i] = x;););
   }
   else
@@ -813,6 +865,7 @@ exp_consttime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 4U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[4U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -821,10 +874,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[4U] = { 0U };
+      memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 4U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -836,11 +893,14 @@ exp_consttime_precomp(
         0U,
         4U,
         1U,
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;););
+    uint64_t aM_copy[4U] = { 0U };
+    memcpy(aM_copy, resM, 4U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -990,7 +1050,7 @@ bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *re
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint64_t m1 = acc0;
   uint64_t m00 = m0 & m1;
   uint64_t bn_zero[4U] = { 0U };
@@ -1011,7 +1071,7 @@ bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *re
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t m2 = acc;
   uint64_t is_valid_m = (m00 & ~m10) & m2;
   uint32_t nBits = 64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64(4U, n);
@@ -1087,9 +1147,9 @@ Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init.
 */
 void Hacl_Bignum256_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1109,8 +1169,10 @@ Hacl_Bignum256_mod_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1141,8 +1203,10 @@ Hacl_Bignum256_mod_exp_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1173,8 +1237,10 @@ Hacl_Bignum256_mod_exp_consttime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1196,10 +1262,12 @@ Hacl_Bignum256_mod_inv_prime_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
   uint64_t n2[4U] = { 0U };
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
-  uint64_t *a1 = k1.n + 1U;
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, n[0U], 2ULL, n2);
+  uint64_t *a1 = n + 1U;
   uint64_t *res1 = n2 + 1U;
   uint64_t c = c0;
   KRML_MAYBE_FOR3(i,
@@ -1212,7 +1280,7 @@ Hacl_Bignum256_mod_inv_prime_vartime_precomp(
   uint64_t c1 = c;
   uint64_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 256U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 256U, n2, res);
 }
 
 
@@ -1254,9 +1322,9 @@ uint64_t *Hacl_Bignum256_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res2;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1295,11 +1363,11 @@ uint64_t *Hacl_Bignum256_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 8U + 1U; i++)
   {
-    uint64_t *os = res2;
     uint8_t *bj = tmp + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r1 = u;
     uint64_t x = r1;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1351,7 +1419,7 @@ uint64_t Hacl_Bignum256_lt_mask(uint64_t *a, uint64_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   return acc;
 }
 
diff --git a/src/Hacl_Bignum256_32.c b/src/Hacl_Bignum256_32.c
index eed6c65c..b734d073 100644
--- a/src/Hacl_Bignum256_32.c
+++ b/src/Hacl_Bignum256_32.c
@@ -179,8 +179,8 @@ void Hacl_Bignum256_32_add_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -247,8 +247,8 @@ void Hacl_Bignum256_32_sub_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -301,8 +301,8 @@ void Hacl_Bignum256_32_sqr(uint32_t *a, uint32_t *res)
     0U,
     8U,
     1U,
-    uint32_t *ab = a;
     uint32_t a_j = a[i0];
+    uint32_t *ab = a;
     uint32_t *res_j = res + i0;
     uint32_t c = 0U;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -328,7 +328,12 @@ void Hacl_Bignum256_32_sqr(uint32_t *a, uint32_t *res)
     }
     uint32_t r = c;
     res[i0 + i0] = r;);
-  uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, res, res, res);
+  uint32_t a_copy0[16U] = { 0U };
+  uint32_t b_copy0[16U] = { 0U };
+  memcpy(a_copy0, res, 16U * sizeof (uint32_t));
+  memcpy(b_copy0, res, 16U * sizeof (uint32_t));
+  uint32_t r = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, a_copy0, b_copy0, res);
+  uint32_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint32_t tmp[16U] = { 0U };
   KRML_MAYBE_FOR8(i,
@@ -340,7 +345,12 @@ void Hacl_Bignum256_32_sqr(uint32_t *a, uint32_t *res)
     uint32_t lo = (uint32_t)res1;
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, res, tmp, res);
+  uint32_t a_copy[16U] = { 0U };
+  uint32_t b_copy[16U] = { 0U };
+  memcpy(a_copy, res, 16U * sizeof (uint32_t));
+  memcpy(b_copy, tmp, 16U * sizeof (uint32_t));
+  uint32_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(16U, a_copy, b_copy, res);
+  uint32_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -352,7 +362,11 @@ static inline void precompr2(uint32_t nBits, uint32_t *n, uint32_t *res)
   res[i] = res[i] | 1U << j;
   for (uint32_t i0 = 0U; i0 < 512U - nBits; i0++)
   {
-    Hacl_Bignum256_32_add_mod(n, res, res, res);
+    uint32_t a_copy[8U] = { 0U };
+    uint32_t b_copy[8U] = { 0U };
+    memcpy(a_copy, res, 8U * sizeof (uint32_t));
+    memcpy(b_copy, res, 8U * sizeof (uint32_t));
+    Hacl_Bignum256_32_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -384,8 +398,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
       c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i););
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 8U + i0;
     uint32_t res_j = c[8U + i0];
+    uint32_t *resb = c + 8U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb););
   memcpy(res, c + 8U, 8U * sizeof (uint32_t));
   uint32_t c00 = c0;
@@ -417,8 +431,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -464,8 +478,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
       c1 = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, qj, c1, res_i););
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 8U + i0;
     uint32_t res_j = c[8U + i0];
+    uint32_t *resb = c + 8U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb););
   memcpy(res, c + 8U, 8U * sizeof (uint32_t));
   uint32_t c00 = c0;
@@ -477,8 +491,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
     0U,
     8U,
     1U,
-    uint32_t *os = res;
     uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    uint32_t *os = res;
     os[i] = x;);
 }
 
@@ -532,7 +546,7 @@ bool Hacl_Bignum256_32_mod(uint32_t *n, uint32_t *a, uint32_t *res)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   uint32_t m1 = acc;
   uint32_t is_valid_m = m0 & m1;
   uint32_t nBits = 32U * Hacl_Bignum_Lib_bn_get_top_index_u32(8U, n);
@@ -564,7 +578,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint32_t m10 = acc0;
   uint32_t m00 = m0 & m10;
   uint32_t bLen;
@@ -590,7 +604,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     {
       uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
       uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint32_t res = acc;
     m1 = res;
@@ -606,7 +620,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   uint32_t m2 = acc;
   uint32_t m = m1 & m2;
   return m00 & m;
@@ -631,9 +645,10 @@ exp_vartime_precomp(
     uint32_t ctx[16U] = { 0U };
     memcpy(ctx, n, 8U * sizeof (uint32_t));
     memcpy(ctx + 8U, r2, 8U * sizeof (uint32_t));
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -642,11 +657,17 @@ exp_vartime_precomp(
       uint32_t bit = tmp >> j & 1U;
       if (!(bit == 0U))
       {
-        uint32_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint32_t aM_copy[8U] = { 0U };
+        memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
+        uint32_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint32_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -673,18 +694,26 @@ exp_vartime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 8U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 8U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 8U;
+    uint32_t aM_copy0[8U] = { 0U };
+    memcpy(aM_copy0, t11, 8U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 8U, tmp, 8U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 8U;
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 8U, tmp, 8U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -699,6 +728,7 @@ exp_vartime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[8U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -707,15 +737,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = bits_l;
     const uint32_t *a_bits_l = table + bits_l32 * 8U;
     memcpy(tmp0, (uint32_t *)a_bits_l, 8U * sizeof (uint32_t));
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -740,9 +777,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 8U * sizeof (uint32_t));
     memcpy(ctx + 8U, r2, 8U * sizeof (uint32_t));
     uint32_t sw = 0U;
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -757,10 +795,16 @@ exp_consttime_precomp(
         uint32_t dummy = (0U - sw1) & (resM[i] ^ aM[i]);
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;);
-      uint32_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
       uint32_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint32_t aM_copy0[8U] = { 0U };
+      memcpy(aM_copy0, resM, 8U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -796,18 +840,26 @@ exp_consttime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 8U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 8U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 8U;
+    uint32_t aM_copy0[8U] = { 0U };
+    memcpy(aM_copy0, t11, 8U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 8U, tmp, 8U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 8U;
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, aM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 8U, tmp, 8U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -824,8 +876,8 @@ exp_consttime_precomp(
         0U,
         8U,
         1U,
-        uint32_t *os = resM;
         uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint32_t *os = resM;
         os[i] = x;););
   }
   else
@@ -833,6 +885,7 @@ exp_consttime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 8U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[8U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -841,10 +894,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[8U] = { 0U };
+      memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint32_t *)table, 8U * sizeof (uint32_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -856,11 +913,14 @@ exp_consttime_precomp(
         0U,
         8U,
         1U,
-        uint32_t *os = tmp0;
         uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint32_t *os = tmp0;
         os[i] = x;););
+    uint32_t aM_copy[8U] = { 0U };
+    memcpy(aM_copy, resM, 8U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -1010,7 +1070,7 @@ bool Hacl_Bignum256_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint32_t m1 = acc0;
   uint32_t m00 = m0 & m1;
   uint32_t bn_zero[8U] = { 0U };
@@ -1031,7 +1091,7 @@ bool Hacl_Bignum256_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   uint32_t m2 = acc;
   uint32_t is_valid_m = (m00 & ~m10) & m2;
   uint32_t nBits = 32U * Hacl_Bignum_Lib_bn_get_top_index_u32(8U, n);
@@ -1121,9 +1181,9 @@ Deallocate the memory previously allocated by Hacl_Bignum256_mont_ctx_init.
 */
 void Hacl_Bignum256_32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1143,8 +1203,10 @@ Hacl_Bignum256_32_mod_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1175,8 +1237,10 @@ Hacl_Bignum256_32_mod_exp_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1207,8 +1271,10 @@ Hacl_Bignum256_32_mod_exp_consttime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1230,10 +1296,12 @@ Hacl_Bignum256_32_mod_inv_prime_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
   uint32_t n2[8U] = { 0U };
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
-  uint32_t *a1 = k1.n + 1U;
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, n[0U], 2U, n2);
+  uint32_t *a1 = n + 1U;
   uint32_t *res1 = n2 + 1U;
   uint32_t c = c0;
   {
@@ -1260,7 +1328,7 @@ Hacl_Bignum256_32_mod_inv_prime_vartime_precomp(
   uint32_t c1 = c;
   uint32_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 256U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 256U, n2, res);
 }
 
 
@@ -1302,9 +1370,9 @@ uint32_t *Hacl_Bignum256_32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint32_t *os = res2;
     uint32_t u = load32_be(tmp + (bnLen - i - 1U) * 4U);
     uint32_t x = u;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1343,11 +1411,11 @@ uint32_t *Hacl_Bignum256_32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 4U + 1U; i++)
   {
-    uint32_t *os = res2;
     uint8_t *bj = tmp + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r1 = u;
     uint32_t x = r1;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1399,7 +1467,7 @@ uint32_t Hacl_Bignum256_32_lt_mask(uint32_t *a, uint32_t *b)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(a[i], b[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   return acc;
 }
 
diff --git a/src/Hacl_Bignum32.c b/src/Hacl_Bignum32.c
index 34b46324..4a371276 100644
--- a/src/Hacl_Bignum32.c
+++ b/src/Hacl_Bignum32.c
@@ -46,9 +46,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -60,7 +69,16 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -70,27 +88,57 @@ uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
-  Hacl_Bignum_bn_add_mod_n_u32(len, n, a, b, res);
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t a_copy[len];
+  memset(a_copy, 0U, len * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), len);
+  uint32_t b_copy[len];
+  memset(b_copy, 0U, len * sizeof (uint32_t));
+  memcpy(a_copy, a, len * sizeof (uint32_t));
+  memcpy(b_copy, b, len * sizeof (uint32_t));
+  Hacl_Bignum_bn_add_mod_n_u32(len, n, a_copy, b_copy, res);
 }
 
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -100,8 +148,13 @@ void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b,
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -114,8 +167,10 @@ void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res)
 {
@@ -149,13 +204,19 @@ bn_slow_precomp(
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -171,7 +232,7 @@ bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m1 = acc;
   uint32_t is_valid_m = m0 & m1;
@@ -195,22 +256,30 @@ bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -238,22 +307,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -281,18 +358,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -308,7 +390,7 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m1 = acc0;
   uint32_t m00 = m0 & m1;
@@ -329,7 +411,7 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t is_valid_m = (m00 & ~m10) & m2;
@@ -393,15 +475,16 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
-
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n)
@@ -429,13 +512,13 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -444,9 +527,11 @@ void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -455,30 +540,35 @@ Hacl_Bignum32_mod_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  bn_slow_precomp(len1, k1.n, k1.mu, k1.r2, a, res);
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  bn_slow_precomp(len1, n, mu, r2, a, res);
 }
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -489,37 +579,35 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -530,30 +618,27 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -562,17 +647,18 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
+  uint32_t len1 = (*k).len;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
   KRML_CHECK_SIZE(sizeof (uint32_t), len1);
   uint32_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint32_t));
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, n[0U], 2U, n2);
   uint32_t c1;
   if (1U < len1)
   {
-    uint32_t *a1 = k1.n + 1U;
+    uint32_t *a1 = n + 1U;
     uint32_t *res1 = n2 + 1U;
     uint32_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -605,9 +691,9 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
   }
   KRML_MAYBE_UNUSED_VAR(c1);
   Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
+    n,
+    mu,
+    r2,
     a,
     32U * len1,
     n2,
@@ -623,13 +709,13 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 {
@@ -653,9 +739,9 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint32_t *os = res2;
     uint32_t u = load32_be(tmp + (bnLen - i - 1U) * 4U);
     uint32_t x = u;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -664,13 +750,13 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 {
@@ -694,11 +780,11 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 4U + 1U; i++)
   {
-    uint32_t *os = res2;
     uint8_t *bj = tmp + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r1 = u;
     uint32_t x = r1;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -707,8 +793,11 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -727,8 +816,11 @@ void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -753,7 +845,11 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
@@ -762,7 +858,7 @@ uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], b[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
@@ -770,7 +866,11 @@ uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
diff --git a/src/Hacl_Bignum4096.c b/src/Hacl_Bignum4096.c
index 3572db07..c9ac9573 100644
--- a/src/Hacl_Bignum4096.c
+++ b/src/Hacl_Bignum4096.c
@@ -180,8 +180,8 @@ void Hacl_Bignum4096_add_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *re
   uint64_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -247,8 +247,8 @@ void Hacl_Bignum4096_sub_mod(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *re
   uint64_t c2 = 0ULL - c00;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -285,7 +285,11 @@ static inline void precompr2(uint32_t nBits, uint64_t *n, uint64_t *res)
   res[i] = res[i] | 1ULL << j;
   for (uint32_t i0 = 0U; i0 < 8192U - nBits; i0++)
   {
-    Hacl_Bignum4096_add_mod(n, res, res, res);
+    uint64_t a_copy[64U] = { 0U };
+    uint64_t b_copy[64U] = { 0U };
+    memcpy(a_copy, res, 64U * sizeof (uint64_t));
+    memcpy(b_copy, res, 64U * sizeof (uint64_t));
+    Hacl_Bignum4096_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -315,8 +319,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
       c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i););
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 64U + i0;
     uint64_t res_j = c[64U + i0];
+    uint64_t *resb = c + 64U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + 64U, 64U * sizeof (uint64_t));
@@ -347,8 +351,8 @@ static inline void reduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t *
   uint64_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -393,8 +397,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
       c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c1, res_i););
     uint64_t r = c1;
     uint64_t c10 = r;
-    uint64_t *resb = c + 64U + i0;
     uint64_t res_j = c[64U + i0];
+    uint64_t *resb = c + 64U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c10, res_j, resb);
   }
   memcpy(res, c + 64U, 64U * sizeof (uint64_t));
@@ -405,8 +409,8 @@ static inline void areduction(uint64_t *n, uint64_t nInv, uint64_t *c, uint64_t
   uint64_t m = 0ULL - c00;
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    uint64_t *os = res;
     uint64_t x = (m & tmp[i]) | (~m & res[i]);
+    uint64_t *os = res;
     os[i] = x;
   }
 }
@@ -459,7 +463,7 @@ bool Hacl_Bignum4096_mod(uint64_t *n, uint64_t *a, uint64_t *res)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   uint64_t is_valid_m = m0 & m1;
@@ -490,7 +494,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m10 = acc0;
   uint64_t m00 = m0 & m10;
@@ -517,7 +521,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     {
       uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t res = acc;
     m1 = res;
@@ -531,7 +535,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t m = m1 & m2;
@@ -557,9 +561,10 @@ exp_vartime_precomp(
     uint64_t ctx[128U] = { 0U };
     memcpy(ctx, n, 64U * sizeof (uint64_t));
     memcpy(ctx + 64U, r2, 64U * sizeof (uint64_t));
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -568,11 +573,17 @@ exp_vartime_precomp(
       uint64_t bit = tmp >> j & 1ULL;
       if (!(bit == 0ULL))
       {
-        uint64_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint64_t aM_copy[64U] = { 0U };
+        memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
+        uint64_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint64_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -599,18 +610,26 @@ exp_vartime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 64U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 64U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 64U;
+    uint64_t aM_copy0[64U] = { 0U };
+    memcpy(aM_copy0, t11, 64U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 64U, tmp, 64U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 64U;
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 64U, tmp, 64U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -625,6 +644,7 @@ exp_vartime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[64U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -633,15 +653,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = (uint32_t)bits_l;
     const uint64_t *a_bits_l = table + bits_l32 * 64U;
     memcpy(tmp0, (uint64_t *)a_bits_l, 64U * sizeof (uint64_t));
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -666,9 +693,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 64U * sizeof (uint64_t));
     memcpy(ctx + 64U, r2, 64U * sizeof (uint64_t));
     uint64_t sw = 0ULL;
-    uint64_t *ctx_n = ctx;
+    uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -682,10 +710,16 @@ exp_consttime_precomp(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint64_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
       uint64_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint64_t aM_copy0[64U] = { 0U };
+      memcpy(aM_copy0, resM, 64U * sizeof (uint64_t));
+      uint64_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -720,18 +754,26 @@ exp_consttime_precomp(
   uint64_t *ctx_n0 = ctx;
   uint64_t *ctx_r20 = ctx + 64U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 64U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 64U;
+    uint64_t aM_copy0[64U] = { 0U };
+    memcpy(aM_copy0, t11, 64U * sizeof (uint64_t));
     uint64_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 64U, tmp, 64U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 64U;
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, aM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 64U, tmp, 64U * sizeof (uint64_t)););
   if (bBits % 4U != 0U)
   {
@@ -746,8 +788,8 @@ exp_consttime_precomp(
       const uint64_t *res_j = table + (i1 + 1U) * 64U;
       for (uint32_t i = 0U; i < 64U; i++)
       {
-        uint64_t *os = resM;
         uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint64_t *os = resM;
         os[i] = x;
       });
   }
@@ -756,6 +798,7 @@ exp_consttime_precomp(
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + 64U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint64_t tmp0[64U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -764,10 +807,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint64_t aM_copy[64U] = { 0U };
+      memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 64U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -777,12 +824,15 @@ exp_consttime_precomp(
       const uint64_t *res_j = table + (i1 + 1U) * 64U;
       for (uint32_t i = 0U; i < 64U; i++)
       {
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;
       });
+    uint64_t aM_copy[64U] = { 0U };
+    memcpy(aM_copy, resM, 64U * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -930,7 +980,7 @@ bool Hacl_Bignum4096_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *r
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m1 = acc0;
   uint64_t m00 = m0 & m1;
@@ -949,7 +999,7 @@ bool Hacl_Bignum4096_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *r
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t is_valid_m = (m00 & ~m10) & m2;
@@ -1042,9 +1092,9 @@ Deallocate the memory previously allocated by Hacl_Bignum4096_mont_ctx_init.
 */
 void Hacl_Bignum4096_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1064,8 +1114,10 @@ Hacl_Bignum4096_mod_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1096,8 +1148,10 @@ Hacl_Bignum4096_mod_exp_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1128,8 +1182,10 @@ Hacl_Bignum4096_mod_exp_consttime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1151,10 +1207,12 @@ Hacl_Bignum4096_mod_inv_prime_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
   uint64_t n2[64U] = { 0U };
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
-  uint64_t *a1 = k1.n + 1U;
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, n[0U], 2ULL, n2);
+  uint64_t *a1 = n + 1U;
   uint64_t *res1 = n2 + 1U;
   uint64_t c = c0;
   KRML_MAYBE_FOR15(i,
@@ -1183,7 +1241,7 @@ Hacl_Bignum4096_mod_inv_prime_vartime_precomp(
   uint64_t c1 = c;
   uint64_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 4096U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 4096U, n2, res);
 }
 
 
@@ -1225,9 +1283,9 @@ uint64_t *Hacl_Bignum4096_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res2;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1266,11 +1324,11 @@ uint64_t *Hacl_Bignum4096_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 8U + 1U; i++)
   {
-    uint64_t *os = res2;
     uint8_t *bj = tmp + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r1 = u;
     uint64_t x = r1;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1326,7 +1384,7 @@ uint64_t Hacl_Bignum4096_lt_mask(uint64_t *a, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
diff --git a/src/Hacl_Bignum4096_32.c b/src/Hacl_Bignum4096_32.c
index 1a8b361c..3b36fbdc 100644
--- a/src/Hacl_Bignum4096_32.c
+++ b/src/Hacl_Bignum4096_32.c
@@ -177,8 +177,8 @@ void Hacl_Bignum4096_32_add_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t
   uint32_t c2 = c00 - c1;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -242,8 +242,8 @@ void Hacl_Bignum4096_32_sub_mod(uint32_t *n, uint32_t *a, uint32_t *b, uint32_t
   uint32_t c2 = 0U - c00;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -280,7 +280,11 @@ static inline void precompr2(uint32_t nBits, uint32_t *n, uint32_t *res)
   res[i] = res[i] | 1U << j;
   for (uint32_t i0 = 0U; i0 < 8192U - nBits; i0++)
   {
-    Hacl_Bignum4096_32_add_mod(n, res, res, res);
+    uint32_t a_copy[128U] = { 0U };
+    uint32_t b_copy[128U] = { 0U };
+    memcpy(a_copy, res, 128U * sizeof (uint32_t));
+    memcpy(b_copy, res, 128U * sizeof (uint32_t));
+    Hacl_Bignum4096_32_add_mod(n, a_copy, b_copy, res);
   }
 }
 
@@ -309,8 +313,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 128U + i0;
     uint32_t res_j = c[128U + i0];
+    uint32_t *resb = c + 128U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + 128U, 128U * sizeof (uint32_t));
@@ -340,8 +344,8 @@ static inline void reduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t *
   uint32_t c2 = c00 - c10;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -385,8 +389,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
     }
     uint32_t r = c1;
     uint32_t c10 = r;
-    uint32_t *resb = c + 128U + i0;
     uint32_t res_j = c[128U + i0];
+    uint32_t *resb = c + 128U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u32(c0, c10, res_j, resb);
   }
   memcpy(res, c + 128U, 128U * sizeof (uint32_t));
@@ -397,8 +401,8 @@ static inline void areduction(uint32_t *n, uint32_t nInv, uint32_t *c, uint32_t
   uint32_t m = 0U - c00;
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    uint32_t *os = res;
     uint32_t x = (m & tmp[i]) | (~m & res[i]);
+    uint32_t *os = res;
     os[i] = x;
   }
 }
@@ -451,7 +455,7 @@ bool Hacl_Bignum4096_32_mod(uint32_t *n, uint32_t *a, uint32_t *res)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m1 = acc;
   uint32_t is_valid_m = m0 & m1;
@@ -482,7 +486,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m10 = acc0;
   uint32_t m00 = m0 & m10;
@@ -509,7 +513,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     {
       uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
       uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint32_t res = acc;
     m1 = res;
@@ -523,7 +527,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t m = m1 & m2;
@@ -549,9 +553,10 @@ exp_vartime_precomp(
     uint32_t ctx[256U] = { 0U };
     memcpy(ctx, n, 128U * sizeof (uint32_t));
     memcpy(ctx + 128U, r2, 128U * sizeof (uint32_t));
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -560,11 +565,17 @@ exp_vartime_precomp(
       uint32_t bit = tmp >> j & 1U;
       if (!(bit == 0U))
       {
-        uint32_t *ctx_n0 = ctx;
-        amont_mul(ctx_n0, mu, resM, aM, resM);
+        uint32_t aM_copy[128U] = { 0U };
+        memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
+        uint32_t *ctx_n = ctx;
+        amont_mul(ctx_n, mu, aM_copy, aM, resM);
+        KRML_MAYBE_UNUSED_VAR(ctx);
       }
-      uint32_t *ctx_n0 = ctx;
-      amont_sqr(ctx_n0, mu, aM, aM);
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
     }
     from(n, mu, resM, res);
     return;
@@ -591,18 +602,26 @@ exp_vartime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 128U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 128U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 128U;
+    uint32_t aM_copy0[128U] = { 0U };
+    memcpy(aM_copy0, t11, 128U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 128U, tmp, 128U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 128U;
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 128U, tmp, 128U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -617,6 +636,7 @@ exp_vartime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[128U] = { 0U };
   for (uint32_t i = 0U; i < bBits / 4U; i++)
@@ -625,15 +645,22 @@ exp_vartime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     uint32_t bits_l32 = bits_l;
     const uint32_t *a_bits_l = table + bits_l32 * 128U;
     memcpy(tmp0, (uint32_t *)a_bits_l, 128U * sizeof (uint32_t));
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -658,9 +685,10 @@ exp_consttime_precomp(
     memcpy(ctx, n, 128U * sizeof (uint32_t));
     memcpy(ctx + 128U, r2, 128U * sizeof (uint32_t));
     uint32_t sw = 0U;
-    uint32_t *ctx_n = ctx;
+    uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
-    from(ctx_n, mu, ctx_r2, resM);
+    from(ctx_n0, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -674,10 +702,16 @@ exp_consttime_precomp(
         resM[i] = resM[i] ^ dummy;
         aM[i] = aM[i] ^ dummy;
       }
-      uint32_t *ctx_n0 = ctx;
-      amont_mul(ctx_n0, mu, aM, resM, aM);
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
       uint32_t *ctx_n1 = ctx;
-      amont_sqr(ctx_n1, mu, resM, resM);
+      amont_mul(ctx_n1, mu, aM_copy, resM, aM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
+      uint32_t aM_copy0[128U] = { 0U };
+      memcpy(aM_copy0, resM, 128U * sizeof (uint32_t));
+      uint32_t *ctx_n = ctx;
+      amont_sqr(ctx_n, mu, aM_copy0, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -712,18 +746,26 @@ exp_consttime_precomp(
   uint32_t *ctx_n0 = ctx;
   uint32_t *ctx_r20 = ctx + 128U;
   from(ctx_n0, mu, ctx_r20, t0);
+  KRML_MAYBE_UNUSED_VAR(ctx);
   memcpy(t1, aM, 128U * sizeof (uint32_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint32_t *t11 = table + (i + 1U) * 128U;
+    uint32_t aM_copy0[128U] = { 0U };
+    memcpy(aM_copy0, t11, 128U * sizeof (uint32_t));
     uint32_t *ctx_n1 = ctx;
-    amont_sqr(ctx_n1, mu, t11, tmp);
+    amont_sqr(ctx_n1, mu, aM_copy0, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 2U) * 128U, tmp, 128U * sizeof (uint32_t));
     uint32_t *t2 = table + (2U * i + 2U) * 128U;
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, aM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, aM, t2, tmp);
+    amont_mul(ctx_n, mu, aM_copy, t2, tmp);
+    KRML_MAYBE_UNUSED_VAR(ctx);
     memcpy(table + (2U * i + 3U) * 128U, tmp, 128U * sizeof (uint32_t)););
   if (bBits % 4U != 0U)
   {
@@ -738,8 +780,8 @@ exp_consttime_precomp(
       const uint32_t *res_j = table + (i1 + 1U) * 128U;
       for (uint32_t i = 0U; i < 128U; i++)
       {
-        uint32_t *os = resM;
         uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+        uint32_t *os = resM;
         os[i] = x;
       });
   }
@@ -748,6 +790,7 @@ exp_consttime_precomp(
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + 128U;
     from(ctx_n, mu, ctx_r2, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   uint32_t tmp0[128U] = { 0U };
   for (uint32_t i0 = 0U; i0 < bBits / 4U; i0++)
@@ -756,10 +799,14 @@ exp_consttime_precomp(
       0U,
       4U,
       1U,
+      uint32_t aM_copy[128U] = { 0U };
+      memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      amont_sqr(ctx_n, mu, resM, resM););
+      amont_sqr(ctx_n, mu, aM_copy, resM);
+      KRML_MAYBE_UNUSED_VAR(ctx););
     uint32_t k = bBits - bBits % 4U - 4U * i0 - 4U;
     uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint32_t *)table, 128U * sizeof (uint32_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -769,12 +816,15 @@ exp_consttime_precomp(
       const uint32_t *res_j = table + (i1 + 1U) * 128U;
       for (uint32_t i = 0U; i < 128U; i++)
       {
-        uint32_t *os = tmp0;
         uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint32_t *os = tmp0;
         os[i] = x;
       });
+    uint32_t aM_copy[128U] = { 0U };
+    memcpy(aM_copy, resM, 128U * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
-    amont_mul(ctx_n, mu, resM, tmp0, resM);
+    amont_mul(ctx_n, mu, aM_copy, tmp0, resM);
+    KRML_MAYBE_UNUSED_VAR(ctx);
   }
   from(n, mu, resM, res);
 }
@@ -922,7 +972,7 @@ bool Hacl_Bignum4096_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m1 = acc0;
   uint32_t m00 = m0 & m1;
@@ -941,7 +991,7 @@ bool Hacl_Bignum4096_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t is_valid_m = (m00 & ~m10) & m2;
@@ -1034,9 +1084,9 @@ Deallocate the memory previously allocated by Hacl_Bignum4096_mont_ctx_init.
 */
 void Hacl_Bignum4096_32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -1056,8 +1106,10 @@ Hacl_Bignum4096_32_mod_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  bn_slow_precomp(k1.n, k1.mu, k1.r2, a, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  bn_slow_precomp(n, mu, r2, a, res);
 }
 
 /**
@@ -1088,8 +1140,10 @@ Hacl_Bignum4096_32_mod_exp_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_vartime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1120,8 +1174,10 @@ Hacl_Bignum4096_32_mod_exp_consttime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  exp_consttime_precomp(k1.n, k1.mu, k1.r2, a, bBits, b, res);
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
+  exp_consttime_precomp(n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -1143,10 +1199,12 @@ Hacl_Bignum4096_32_mod_inv_prime_vartime_precomp(
   uint32_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
+  uint32_t *n = (*k).n;
+  uint32_t mu = (*k).mu;
+  uint32_t *r2 = (*k).r2;
   uint32_t n2[128U] = { 0U };
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
-  uint32_t *a1 = k1.n + 1U;
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, n[0U], 2U, n2);
+  uint32_t *a1 = n + 1U;
   uint32_t *res1 = n2 + 1U;
   uint32_t c = c0;
   for (uint32_t i = 0U; i < 31U; i++)
@@ -1174,7 +1232,7 @@ Hacl_Bignum4096_32_mod_inv_prime_vartime_precomp(
   uint32_t c1 = c;
   uint32_t c2 = c1;
   KRML_MAYBE_UNUSED_VAR(c2);
-  exp_vartime_precomp(k1.n, k1.mu, k1.r2, a, 4096U, n2, res);
+  exp_vartime_precomp(n, mu, r2, a, 4096U, n2, res);
 }
 
 
@@ -1216,9 +1274,9 @@ uint32_t *Hacl_Bignum4096_32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint32_t *os = res2;
     uint32_t u = load32_be(tmp + (bnLen - i - 1U) * 4U);
     uint32_t x = u;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1257,11 +1315,11 @@ uint32_t *Hacl_Bignum4096_32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 4U + 1U; i++)
   {
-    uint32_t *os = res2;
     uint8_t *bj = tmp + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r1 = u;
     uint32_t x = r1;
+    uint32_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -1317,7 +1375,7 @@ uint32_t Hacl_Bignum4096_32_lt_mask(uint32_t *a, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], b[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
diff --git a/src/Hacl_Bignum64.c b/src/Hacl_Bignum64.c
index f8f5bb6f..2ee38f17 100644
--- a/src/Hacl_Bignum64.c
+++ b/src/Hacl_Bignum64.c
@@ -78,7 +78,15 @@ Write `(a + b) mod n` in `res`.
 */
 void Hacl_Bignum64_add_mod(uint32_t len, uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
 {
-  Hacl_Bignum_bn_add_mod_n_u64(len, n, a, b, res);
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t a_copy[len];
+  memset(a_copy, 0U, len * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), len);
+  uint64_t b_copy[len];
+  memset(b_copy, 0U, len * sizeof (uint64_t));
+  memcpy(a_copy, a, len * sizeof (uint64_t));
+  memcpy(b_copy, b, len * sizeof (uint64_t));
+  Hacl_Bignum_bn_add_mod_n_u64(len, n, a_copy, b_copy, res);
 }
 
 /**
@@ -170,7 +178,7 @@ bool Hacl_Bignum64_mod(uint32_t len, uint64_t *n, uint64_t *a, uint64_t *res)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   uint64_t is_valid_m = m0 & m1;
@@ -307,7 +315,7 @@ bool Hacl_Bignum64_mod_inv_prime_vartime(uint32_t len, uint64_t *n, uint64_t *a,
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m1 = acc0;
   uint64_t m00 = m0 & m1;
@@ -328,7 +336,7 @@ bool Hacl_Bignum64_mod_inv_prime_vartime(uint32_t len, uint64_t *n, uint64_t *a,
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t is_valid_m = (m00 & ~m10) & m2;
@@ -432,9 +440,9 @@ Deallocate the memory previously allocated by Hacl_Bignum64_mont_ctx_init.
 */
 void Hacl_Bignum64_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -454,10 +462,11 @@ Hacl_Bignum64_mod_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  bn_slow_precomp(len1, k1.n, k1.mu, k1.r2, a, res);
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  bn_slow_precomp(len1, n, mu, r2, a, res);
 }
 
 /**
@@ -488,17 +497,11 @@ Hacl_Bignum64_mod_exp_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -529,17 +532,11 @@ Hacl_Bignum64_mod_exp_consttime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
-    a,
-    bBits,
-    b,
-    res);
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
+  Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(len1, n, mu, r2, a, bBits, b, res);
 }
 
 /**
@@ -561,17 +558,18 @@ Hacl_Bignum64_mod_inv_prime_vartime_precomp(
   uint64_t *res
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k10 = *k;
-  uint32_t len1 = k10.len;
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
+  uint32_t len1 = (*k).len;
+  uint64_t *n = (*k).n;
+  uint64_t mu = (*k).mu;
+  uint64_t *r2 = (*k).r2;
   KRML_CHECK_SIZE(sizeof (uint64_t), len1);
   uint64_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint64_t));
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, n[0U], 2ULL, n2);
   uint64_t c1;
   if (1U < len1)
   {
-    uint64_t *a1 = k1.n + 1U;
+    uint64_t *a1 = n + 1U;
     uint64_t *res1 = n2 + 1U;
     uint64_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -604,9 +602,9 @@ Hacl_Bignum64_mod_inv_prime_vartime_precomp(
   }
   KRML_MAYBE_UNUSED_VAR(c1);
   Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(len1,
-    k1.n,
-    k1.mu,
-    k1.r2,
+    n,
+    mu,
+    r2,
     a,
     64U * len1,
     n2,
@@ -652,9 +650,9 @@ uint64_t *Hacl_Bignum64_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
   memcpy(tmp + tmpLen - len, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < bnLen; i++)
   {
-    uint64_t *os = res2;
     uint64_t u = load64_be(tmp + (bnLen - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -693,11 +691,11 @@ uint64_t *Hacl_Bignum64_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
   memcpy(tmp, b, len * sizeof (uint8_t));
   for (uint32_t i = 0U; i < (len - 1U) / 8U + 1U; i++)
   {
-    uint64_t *os = res2;
     uint8_t *bj = tmp + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r1 = u;
     uint64_t x = r1;
+    uint64_t *os = res2;
     os[i] = x;
   }
   return res2;
@@ -761,7 +759,7 @@ uint64_t Hacl_Bignum64_lt_mask(uint32_t len, uint64_t *a, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
diff --git a/src/Hacl_Chacha20.c b/src/Hacl_Chacha20.c
index 38a5c373..cc5b5fb4 100644
--- a/src/Hacl_Chacha20.c
+++ b/src/Hacl_Chacha20.c
@@ -102,45 +102,43 @@ static inline void chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr)
     0U,
     16U,
     1U,
-    uint32_t *os = k;
     uint32_t x = k[i] + ctx[i];
+    uint32_t *os = k;
     os[i] = x;);
   k[12U] = k[12U] + ctr_u32;
 }
 
-static const
-uint32_t
-chacha20_constants[4U] = { 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U };
-
 void Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
 {
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
+    uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
     uint32_t *os = ctx;
-    uint32_t x = chacha20_constants[i];
     os[i] = x;);
+  uint32_t *uu____0 = ctx + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx[12U] = ctr;
+  uint32_t *uu____1 = ctx + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
 }
 
@@ -153,18 +151,18 @@ static void chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, u
     0U,
     16U,
     1U,
-    uint32_t *os = bl;
     uint8_t *bj = text + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = bl;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    uint32_t *os = bl;
     uint32_t x = bl[i] ^ k[i];
+    uint32_t *os = bl;
     os[i] = x;);
   KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(out + i * 4U, bl[i]););
 }
@@ -174,7 +172,9 @@ chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr,
 {
   uint8_t plain[64U] = { 0U };
   memcpy(plain, text, len * sizeof (uint8_t));
-  chacha20_encrypt_block(ctx, plain, incr, plain);
+  uint8_t plain_copy[64U] = { 0U };
+  memcpy(plain_copy, plain, 64U * sizeof (uint8_t));
+  chacha20_encrypt_block(ctx, plain, incr, plain_copy);
   memcpy(out, plain, len * sizeof (uint8_t));
 }
 
diff --git a/src/Hacl_Chacha20_Vec128.c b/src/Hacl_Chacha20_Vec128.c
index deab1dfc..1c49e409 100644
--- a/src/Hacl_Chacha20_Vec128.c
+++ b/src/Hacl_Chacha20_Vec128.c
@@ -153,8 +153,8 @@ chacha20_core_128(
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = k;
     Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]);
+    Lib_IntVector_Intrinsics_vec128 *os = k;
     os[i] = x;);
   k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv);
 }
@@ -167,37 +167,39 @@ chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n,
     0U,
     4U,
     1U,
-    uint32_t *os = ctx1;
     uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+    uint32_t *os = ctx1;
     os[i] = x;);
+  uint32_t *uu____0 = ctx1 + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx1 + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx1[12U] = ctr;
+  uint32_t *uu____1 = ctx1 + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx1 + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = ctx;
     uint32_t x = ctx1[i];
     Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x);
+    Lib_IntVector_Intrinsics_vec128 *os = ctx;
     os[i] = x0;);
   Lib_IntVector_Intrinsics_vec128 ctr1 = Lib_IntVector_Intrinsics_vec128_load32s(0U, 1U, 2U, 3U);
   Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U];
diff --git a/src/Hacl_Chacha20_Vec256.c b/src/Hacl_Chacha20_Vec256.c
index e61a7cfe..83195c90 100644
--- a/src/Hacl_Chacha20_Vec256.c
+++ b/src/Hacl_Chacha20_Vec256.c
@@ -153,8 +153,8 @@ chacha20_core_256(
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = k;
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = k;
     os[i] = x;);
   k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
 }
@@ -167,37 +167,39 @@ chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n,
     0U,
     4U,
     1U,
-    uint32_t *os = ctx1;
     uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+    uint32_t *os = ctx1;
     os[i] = x;);
+  uint32_t *uu____0 = ctx1 + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx1 + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx1[12U] = ctr;
+  uint32_t *uu____1 = ctx1 + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx1 + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = ctx;
     uint32_t x = ctx1[i];
     Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x);
+    Lib_IntVector_Intrinsics_vec256 *os = ctx;
     os[i] = x0;);
   Lib_IntVector_Intrinsics_vec256
   ctr1 = Lib_IntVector_Intrinsics_vec256_load32s(0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U);
diff --git a/src/Hacl_Chacha20_Vec32.c b/src/Hacl_Chacha20_Vec32.c
index 0dce915c..63f1e951 100644
--- a/src/Hacl_Chacha20_Vec32.c
+++ b/src/Hacl_Chacha20_Vec32.c
@@ -147,8 +147,8 @@ static inline void chacha20_core_32(uint32_t *k, uint32_t *ctx, uint32_t ctr)
     0U,
     16U,
     1U,
-    uint32_t *os = k;
     uint32_t x = k[i] + ctx[i];
+    uint32_t *os = k;
     os[i] = x;);
   k[12U] = k[12U] + cv;
 }
@@ -160,36 +160,38 @@ static inline void chacha20_init_32(uint32_t *ctx, uint8_t *k, uint8_t *n, uint3
     0U,
     4U,
     1U,
-    uint32_t *os = ctx1;
     uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+    uint32_t *os = ctx1;
     os[i] = x;);
+  uint32_t *uu____0 = ctx1 + 4U;
   KRML_MAYBE_FOR8(i,
     0U,
     8U,
     1U,
-    uint32_t *os = ctx1 + 4U;
     uint8_t *bj = k + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
   ctx1[12U] = ctr;
+  uint32_t *uu____1 = ctx1 + 13U;
   KRML_MAYBE_FOR3(i,
     0U,
     3U,
     1U,
-    uint32_t *os = ctx1 + 13U;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    uint32_t *os = ctx;
     uint32_t x = ctx1[i];
+    uint32_t *os = ctx;
     os[i] = x;);
   uint32_t ctr1 = 0U;
   uint32_t c12 = ctx[12U];
diff --git a/src/Hacl_Curve25519_51.c b/src/Hacl_Curve25519_51.c
index ca561e89..2d1b7c76 100644
--- a/src/Hacl_Curve25519_51.c
+++ b/src/Hacl_Curve25519_51.c
@@ -38,64 +38,87 @@ static void point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_
   uint64_t *x1 = q;
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 5U;
-  uint64_t *z3 = nq_p1 + 5U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 5U;
-  uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 10U;
+  uint64_t *ab = tmp1;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 5U;
   Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2);
   Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2);
+  uint64_t *ab1 = tmp1;
   uint64_t *x3 = nq_p1;
   uint64_t *z31 = nq_p1 + 5U;
   uint64_t *d0 = dc;
   uint64_t *c0 = dc + 5U;
   Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31);
   Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31);
-  Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2);
-  Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0);
-  Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0);
-  uint64_t *a1 = tmp1;
-  uint64_t *b1 = tmp1 + 5U;
-  uint64_t *d = tmp1 + 10U;
-  uint64_t *c = tmp1 + 15U;
-  uint64_t *ab1 = tmp1;
+  uint64_t f1_copy0[10U] = { 0U };
+  memcpy(f1_copy0, dc, 10U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul2(dc, f1_copy0, ab1, tmp2);
+  uint64_t *d1 = dc;
+  uint64_t *c1 = dc + 5U;
+  Hacl_Impl_Curve25519_Field51_fadd(x3, d1, c1);
+  Hacl_Impl_Curve25519_Field51_fsub(z31, d1, c1);
+  uint64_t *ab2 = tmp1;
   uint64_t *dc1 = tmp1 + 10U;
-  Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2);
-  Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2);
+  Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab2, tmp2);
+  uint64_t f1_copy1[10U] = { 0U };
+  memcpy(f1_copy1, nq_p1, 10U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, f1_copy1, tmp2);
+  uint64_t *a1 = ab2;
+  uint64_t *b1 = ab2 + 5U;
+  uint64_t *d = dc1;
+  uint64_t *c = dc1 + 5U;
   a1[0U] = c[0U];
   a1[1U] = c[1U];
   a1[2U] = c[2U];
   a1[3U] = c[3U];
   a1[4U] = c[4U];
-  Hacl_Impl_Curve25519_Field51_fsub(c, d, c);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, c, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fsub(c, d, f2_copy);
   Hacl_Impl_Curve25519_Field51_fmul1(b1, c, 121665ULL);
-  Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d);
-  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2);
-  Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2);
+  uint64_t f1_copy2[5U] = { 0U };
+  memcpy(f1_copy2, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fadd(b1, f1_copy2, d);
+  uint64_t *ab3 = tmp1;
+  uint64_t *dc2 = tmp1 + 10U;
+  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc2, ab3, tmp2);
+  uint64_t *z310 = nq_p1 + 5U;
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, z310, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(z310, f1_copy, x1, tmp2);
 }
 
 static void point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2)
 {
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 5U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 5U;
-  uint64_t *d = tmp1 + 10U;
-  uint64_t *c = tmp1 + 15U;
   uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 10U;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 5U;
   Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2);
   Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2);
   Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2);
-  a[0U] = c[0U];
-  a[1U] = c[1U];
-  a[2U] = c[2U];
-  a[3U] = c[3U];
-  a[4U] = c[4U];
-  Hacl_Impl_Curve25519_Field51_fsub(c, d, c);
-  Hacl_Impl_Curve25519_Field51_fmul1(b, c, 121665ULL);
-  Hacl_Impl_Curve25519_Field51_fadd(b, b, d);
-  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2);
+  uint64_t *d = dc;
+  uint64_t *c = dc + 5U;
+  uint64_t *a1 = ab;
+  uint64_t *b1 = ab + 5U;
+  a1[0U] = c[0U];
+  a1[1U] = c[1U];
+  a1[2U] = c[2U];
+  a1[3U] = c[3U];
+  a1[4U] = c[4U];
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, c, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fsub(c, d, f2_copy);
+  Hacl_Impl_Curve25519_Field51_fmul1(b1, c, 121665ULL);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fadd(b1, f1_copy, d);
+  uint64_t *ab1 = tmp1;
+  uint64_t *dc1 = tmp1 + 10U;
+  Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2);
 }
 
 static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
@@ -104,7 +127,6 @@ static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
   for (uint32_t _i = 0U; _i < 10U; ++_i)
     tmp2[_i] = FStar_UInt128_uint64_to_uint128(0ULL);
   uint64_t p01_tmp1_swap[41U] = { 0U };
-  uint64_t *p0 = p01_tmp1_swap;
   uint64_t *p01 = p01_tmp1_swap;
   uint64_t *p03 = p01;
   uint64_t *p11 = p01 + 10U;
@@ -121,34 +143,39 @@ static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
   z0[2U] = 0ULL;
   z0[3U] = 0ULL;
   z0[4U] = 0ULL;
+  uint64_t *swap = p01_tmp1_swap + 40U;
   uint64_t *p01_tmp1 = p01_tmp1_swap;
+  uint64_t *nq0 = p01_tmp1;
+  uint64_t *nq_p1 = p01_tmp1 + 10U;
+  Hacl_Impl_Curve25519_Field51_cswap2(1ULL, nq0, nq_p1);
   uint64_t *p01_tmp11 = p01_tmp1_swap;
-  uint64_t *nq1 = p01_tmp1_swap;
-  uint64_t *nq_p11 = p01_tmp1_swap + 10U;
-  uint64_t *swap = p01_tmp1_swap + 40U;
-  Hacl_Impl_Curve25519_Field51_cswap2(1ULL, nq1, nq_p11);
   point_add_and_double(init, p01_tmp11, tmp2);
   swap[0U] = 1ULL;
   for (uint32_t i = 0U; i < 251U; i++)
   {
     uint64_t *p01_tmp12 = p01_tmp1_swap;
     uint64_t *swap1 = p01_tmp1_swap + 40U;
-    uint64_t *nq2 = p01_tmp12;
-    uint64_t *nq_p12 = p01_tmp12 + 10U;
+    uint64_t *nq1 = p01_tmp12;
+    uint64_t *nq_p11 = p01_tmp12 + 10U;
     uint64_t bit = (uint64_t)((uint32_t)key[(253U - i) / 8U] >> (253U - i) % 8U & 1U);
     uint64_t sw = swap1[0U] ^ bit;
-    Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12);
+    Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11);
     point_add_and_double(init, p01_tmp12, tmp2);
     swap1[0U] = bit;
   }
   uint64_t sw = swap[0U];
+  uint64_t *p01_tmp12 = p01_tmp1_swap;
+  uint64_t *nq1 = p01_tmp12;
+  uint64_t *nq_p11 = p01_tmp12 + 10U;
   Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11);
-  uint64_t *nq10 = p01_tmp1;
-  uint64_t *tmp1 = p01_tmp1 + 20U;
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  memcpy(out, p0, 10U * sizeof (uint64_t));
+  uint64_t *p01_tmp10 = p01_tmp1_swap;
+  uint64_t *nq = p01_tmp10;
+  uint64_t *tmp1 = p01_tmp10 + 20U;
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  uint64_t *p010 = p01_tmp1_swap;
+  memcpy(out, p010, 10U * sizeof (uint64_t));
 }
 
 void
@@ -162,7 +189,9 @@ Hacl_Curve25519_51_fsquare_times(
   Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp);
   for (uint32_t i = 0U; i < n - 1U; i++)
   {
-    Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp);
+    uint64_t f1_copy[5U] = { 0U };
+    memcpy(f1_copy, o, 5U * sizeof (uint64_t));
+    Hacl_Impl_Curve25519_Field51_fsqr(o, f1_copy, tmp);
   }
 }
 
@@ -176,32 +205,59 @@ void Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tm
   Hacl_Curve25519_51_fsquare_times(a1, i, tmp10, 1U);
   Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, 2U);
   Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp);
-  Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp);
-  Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, 1U);
-  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp);
-  Hacl_Curve25519_51_fsquare_times(t010, b1, tmp10, 5U);
-  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, a1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(a1, b1, f2_copy, tmp);
+  FStar_UInt128_uint128 *tmp11 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t010, a1, tmp11, 1U);
+  uint64_t f2_copy0[5U] = { 0U };
+  memcpy(f2_copy0, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, f2_copy0, tmp);
+  FStar_UInt128_uint128 *tmp12 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t010, b1, tmp12, 5U);
+  uint64_t f2_copy1[5U] = { 0U };
+  memcpy(f2_copy1, b1, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(b1, t010, f2_copy1, tmp);
   uint64_t *b10 = t1 + 5U;
   uint64_t *c10 = t1 + 10U;
   uint64_t *t011 = t1 + 15U;
-  FStar_UInt128_uint128 *tmp11 = tmp;
-  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, 10U);
+  FStar_UInt128_uint128 *tmp13 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp13, 10U);
   Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp);
-  Hacl_Curve25519_51_fsquare_times(t011, c10, tmp11, 20U);
-  Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp);
-  Hacl_Curve25519_51_fsquare_times(t011, t011, tmp11, 10U);
-  Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp);
-  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, 50U);
+  FStar_UInt128_uint128 *tmp110 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t011, c10, tmp110, 20U);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, t011, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(t011, f1_copy, c10, tmp);
+  FStar_UInt128_uint128 *tmp120 = tmp;
+  uint64_t i_copy0[5U] = { 0U };
+  memcpy(i_copy0, t011, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(t011, i_copy0, tmp120, 10U);
+  uint64_t f2_copy2[5U] = { 0U };
+  memcpy(f2_copy2, b10, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(b10, t011, f2_copy2, tmp);
+  FStar_UInt128_uint128 *tmp130 = tmp;
+  Hacl_Curve25519_51_fsquare_times(t011, b10, tmp130, 50U);
   Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp);
   uint64_t *b11 = t1 + 5U;
   uint64_t *c1 = t1 + 10U;
   uint64_t *t01 = t1 + 15U;
   FStar_UInt128_uint128 *tmp1 = tmp;
   Hacl_Curve25519_51_fsquare_times(t01, c1, tmp1, 100U);
-  Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp);
-  Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, 50U);
-  Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp);
-  Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, 5U);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, t01, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(t01, f1_copy0, c1, tmp);
+  FStar_UInt128_uint128 *tmp111 = tmp;
+  uint64_t i_copy1[5U] = { 0U };
+  memcpy(i_copy1, t01, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(t01, i_copy1, tmp111, 50U);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, t01, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(t01, f1_copy1, b11, tmp);
+  FStar_UInt128_uint128 *tmp121 = tmp;
+  uint64_t i_copy[5U] = { 0U };
+  memcpy(i_copy, t01, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(t01, i_copy, tmp121, 5U);
   uint64_t *a = t1;
   uint64_t *t0 = t1 + 15U;
   Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp);
@@ -217,7 +273,9 @@ static void encode_point(uint8_t *o, uint64_t *i)
   for (uint32_t _i = 0U; _i < 10U; ++_i)
     tmp_w[_i] = FStar_UInt128_uint64_to_uint128(0ULL);
   Hacl_Curve25519_51_finv(tmp, z, tmp_w);
-  Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, tmp, 5U * sizeof (uint64_t));
+  Hacl_Impl_Curve25519_Field51_fmul(tmp, f1_copy, x, tmp_w);
   Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp);
   KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, store64_le(o + i0 * 8U, u64s[i0]););
 }
@@ -232,16 +290,17 @@ Compute the scalar multiple of a point.
 void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
 {
   uint64_t init[10U] = { 0U };
+  uint64_t init_copy[10U] = { 0U };
   uint64_t tmp[4U] = { 0U };
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = tmp;
     uint8_t *bj = pub + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = tmp;
     os[i] = x;);
   uint64_t tmp3 = tmp[3U];
   tmp[3U] = tmp3 & 0x7fffffffffffffffULL;
@@ -265,7 +324,8 @@ void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
   x[2U] = f1h | f2l;
   x[3U] = f2h | f3l;
   x[4U] = f3h;
-  montgomery_ladder(init, priv, init);
+  memcpy(init_copy, init, 10U * sizeof (uint64_t));
+  montgomery_ladder(init, priv, init_copy);
   encode_point(out, init);
 }
 
@@ -282,8 +342,8 @@ void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv)
   uint8_t basepoint[32U] = { 0U };
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = basepoint;
     uint8_t x = g25519[i];
+    uint8_t *os = basepoint;
     os[i] = x;
   }
   Hacl_Curve25519_51_scalarmult(pub, priv, basepoint);
diff --git a/src/Hacl_Curve25519_64.c b/src/Hacl_Curve25519_64.c
index edcab306..0a0dd778 100644
--- a/src/Hacl_Curve25519_64.c
+++ b/src/Hacl_Curve25519_64.c
@@ -121,69 +121,91 @@ static void point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2
   uint64_t *x1 = q;
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 4U;
-  uint64_t *z3 = nq_p1 + 4U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 4U;
-  uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 8U;
+  uint64_t *ab = tmp1;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 4U;
   fadd0(a, x2, z2);
   fsub0(b, x2, z2);
+  uint64_t *ab1 = tmp1;
   uint64_t *x3 = nq_p1;
   uint64_t *z31 = nq_p1 + 4U;
   uint64_t *d0 = dc;
   uint64_t *c0 = dc + 4U;
   fadd0(c0, x3, z31);
   fsub0(d0, x3, z31);
-  fmul20(dc, dc, ab, tmp2);
-  fadd0(x3, d0, c0);
-  fsub0(z31, d0, c0);
-  uint64_t *a1 = tmp1;
-  uint64_t *b1 = tmp1 + 4U;
-  uint64_t *d = tmp1 + 8U;
-  uint64_t *c = tmp1 + 12U;
-  uint64_t *ab1 = tmp1;
+  uint64_t f1_copy0[8U] = { 0U };
+  memcpy(f1_copy0, dc, 8U * sizeof (uint64_t));
+  fmul20(dc, f1_copy0, ab1, tmp2);
+  uint64_t *d1 = dc;
+  uint64_t *c1 = dc + 4U;
+  fadd0(x3, d1, c1);
+  fsub0(z31, d1, c1);
+  uint64_t *ab2 = tmp1;
   uint64_t *dc1 = tmp1 + 8U;
-  fsqr20(dc1, ab1, tmp2);
-  fsqr20(nq_p1, nq_p1, tmp2);
+  fsqr20(dc1, ab2, tmp2);
+  uint64_t f1_copy1[8U] = { 0U };
+  memcpy(f1_copy1, nq_p1, 8U * sizeof (uint64_t));
+  fsqr20(nq_p1, f1_copy1, tmp2);
+  uint64_t *a1 = ab2;
+  uint64_t *b1 = ab2 + 4U;
+  uint64_t *d = dc1;
+  uint64_t *c = dc1 + 4U;
   a1[0U] = c[0U];
   a1[1U] = c[1U];
   a1[2U] = c[2U];
   a1[3U] = c[3U];
-  fsub0(c, d, c);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, c, 4U * sizeof (uint64_t));
+  fsub0(c, d, f2_copy);
   fmul_scalar0(b1, c, 121665ULL);
-  fadd0(b1, b1, d);
-  fmul20(nq, dc1, ab1, tmp2);
-  fmul0(z3, z3, x1, tmp2);
+  uint64_t f1_copy2[4U] = { 0U };
+  memcpy(f1_copy2, b1, 4U * sizeof (uint64_t));
+  fadd0(b1, f1_copy2, d);
+  uint64_t *ab3 = tmp1;
+  uint64_t *dc2 = tmp1 + 8U;
+  fmul20(nq, dc2, ab3, tmp2);
+  uint64_t *z310 = nq_p1 + 4U;
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, z310, 4U * sizeof (uint64_t));
+  fmul0(z310, f1_copy, x1, tmp2);
 }
 
 static void point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2)
 {
   uint64_t *x2 = nq;
   uint64_t *z2 = nq + 4U;
-  uint64_t *a = tmp1;
-  uint64_t *b = tmp1 + 4U;
-  uint64_t *d = tmp1 + 8U;
-  uint64_t *c = tmp1 + 12U;
   uint64_t *ab = tmp1;
   uint64_t *dc = tmp1 + 8U;
+  uint64_t *a = ab;
+  uint64_t *b = ab + 4U;
   fadd0(a, x2, z2);
   fsub0(b, x2, z2);
   fsqr20(dc, ab, tmp2);
-  a[0U] = c[0U];
-  a[1U] = c[1U];
-  a[2U] = c[2U];
-  a[3U] = c[3U];
-  fsub0(c, d, c);
-  fmul_scalar0(b, c, 121665ULL);
-  fadd0(b, b, d);
-  fmul20(nq, dc, ab, tmp2);
+  uint64_t *d = dc;
+  uint64_t *c = dc + 4U;
+  uint64_t *a1 = ab;
+  uint64_t *b1 = ab + 4U;
+  a1[0U] = c[0U];
+  a1[1U] = c[1U];
+  a1[2U] = c[2U];
+  a1[3U] = c[3U];
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, c, 4U * sizeof (uint64_t));
+  fsub0(c, d, f2_copy);
+  fmul_scalar0(b1, c, 121665ULL);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, b1, 4U * sizeof (uint64_t));
+  fadd0(b1, f1_copy, d);
+  uint64_t *ab1 = tmp1;
+  uint64_t *dc1 = tmp1 + 8U;
+  fmul20(nq, dc1, ab1, tmp2);
 }
 
 static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
 {
   uint64_t tmp2[16U] = { 0U };
   uint64_t p01_tmp1_swap[33U] = { 0U };
-  uint64_t *p0 = p01_tmp1_swap;
   uint64_t *p01 = p01_tmp1_swap;
   uint64_t *p03 = p01;
   uint64_t *p11 = p01 + 8U;
@@ -198,34 +220,39 @@ static void montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
   z0[1U] = 0ULL;
   z0[2U] = 0ULL;
   z0[3U] = 0ULL;
+  uint64_t *swap = p01_tmp1_swap + 32U;
   uint64_t *p01_tmp1 = p01_tmp1_swap;
+  uint64_t *nq0 = p01_tmp1;
+  uint64_t *nq_p1 = p01_tmp1 + 8U;
+  cswap20(1ULL, nq0, nq_p1);
   uint64_t *p01_tmp11 = p01_tmp1_swap;
-  uint64_t *nq1 = p01_tmp1_swap;
-  uint64_t *nq_p11 = p01_tmp1_swap + 8U;
-  uint64_t *swap = p01_tmp1_swap + 32U;
-  cswap20(1ULL, nq1, nq_p11);
   point_add_and_double(init, p01_tmp11, tmp2);
   swap[0U] = 1ULL;
   for (uint32_t i = 0U; i < 251U; i++)
   {
     uint64_t *p01_tmp12 = p01_tmp1_swap;
     uint64_t *swap1 = p01_tmp1_swap + 32U;
-    uint64_t *nq2 = p01_tmp12;
-    uint64_t *nq_p12 = p01_tmp12 + 8U;
+    uint64_t *nq1 = p01_tmp12;
+    uint64_t *nq_p11 = p01_tmp12 + 8U;
     uint64_t bit = (uint64_t)((uint32_t)key[(253U - i) / 8U] >> (253U - i) % 8U & 1U);
     uint64_t sw = swap1[0U] ^ bit;
-    cswap20(sw, nq2, nq_p12);
+    cswap20(sw, nq1, nq_p11);
     point_add_and_double(init, p01_tmp12, tmp2);
     swap1[0U] = bit;
   }
   uint64_t sw = swap[0U];
+  uint64_t *p01_tmp12 = p01_tmp1_swap;
+  uint64_t *nq1 = p01_tmp12;
+  uint64_t *nq_p11 = p01_tmp12 + 8U;
   cswap20(sw, nq1, nq_p11);
-  uint64_t *nq10 = p01_tmp1;
-  uint64_t *tmp1 = p01_tmp1 + 16U;
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  point_double(nq10, tmp1, tmp2);
-  memcpy(out, p0, 8U * sizeof (uint64_t));
+  uint64_t *p01_tmp10 = p01_tmp1_swap;
+  uint64_t *nq = p01_tmp10;
+  uint64_t *tmp1 = p01_tmp10 + 16U;
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  point_double(nq, tmp1, tmp2);
+  uint64_t *p010 = p01_tmp1_swap;
+  memcpy(out, p010, 8U * sizeof (uint64_t));
 }
 
 static void fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n)
@@ -233,7 +260,9 @@ static void fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n)
   fsqr0(o, inp, tmp);
   for (uint32_t i = 0U; i < n - 1U; i++)
   {
-    fsqr0(o, o, tmp);
+    uint64_t f1_copy[4U] = { 0U };
+    memcpy(f1_copy, o, 4U * sizeof (uint64_t));
+    fsqr0(o, f1_copy, tmp);
   }
 }
 
@@ -247,32 +276,59 @@ static void finv(uint64_t *o, uint64_t *i, uint64_t *tmp)
   fsquare_times(a1, i, tmp10, 1U);
   fsquare_times(t010, a1, tmp10, 2U);
   fmul0(b1, t010, i, tmp);
-  fmul0(a1, b1, a1, tmp);
-  fsquare_times(t010, a1, tmp10, 1U);
-  fmul0(b1, t010, b1, tmp);
-  fsquare_times(t010, b1, tmp10, 5U);
-  fmul0(b1, t010, b1, tmp);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, a1, 4U * sizeof (uint64_t));
+  fmul0(a1, b1, f2_copy, tmp);
+  uint64_t *tmp11 = tmp;
+  fsquare_times(t010, a1, tmp11, 1U);
+  uint64_t f2_copy0[4U] = { 0U };
+  memcpy(f2_copy0, b1, 4U * sizeof (uint64_t));
+  fmul0(b1, t010, f2_copy0, tmp);
+  uint64_t *tmp12 = tmp;
+  fsquare_times(t010, b1, tmp12, 5U);
+  uint64_t f2_copy1[4U] = { 0U };
+  memcpy(f2_copy1, b1, 4U * sizeof (uint64_t));
+  fmul0(b1, t010, f2_copy1, tmp);
   uint64_t *b10 = t1 + 4U;
   uint64_t *c10 = t1 + 8U;
   uint64_t *t011 = t1 + 12U;
-  uint64_t *tmp11 = tmp;
-  fsquare_times(t011, b10, tmp11, 10U);
+  uint64_t *tmp13 = tmp;
+  fsquare_times(t011, b10, tmp13, 10U);
   fmul0(c10, t011, b10, tmp);
-  fsquare_times(t011, c10, tmp11, 20U);
-  fmul0(t011, t011, c10, tmp);
-  fsquare_times(t011, t011, tmp11, 10U);
-  fmul0(b10, t011, b10, tmp);
-  fsquare_times(t011, b10, tmp11, 50U);
+  uint64_t *tmp110 = tmp;
+  fsquare_times(t011, c10, tmp110, 20U);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, t011, 4U * sizeof (uint64_t));
+  fmul0(t011, f1_copy, c10, tmp);
+  uint64_t *tmp120 = tmp;
+  uint64_t i_copy0[4U] = { 0U };
+  memcpy(i_copy0, t011, 4U * sizeof (uint64_t));
+  fsquare_times(t011, i_copy0, tmp120, 10U);
+  uint64_t f2_copy2[4U] = { 0U };
+  memcpy(f2_copy2, b10, 4U * sizeof (uint64_t));
+  fmul0(b10, t011, f2_copy2, tmp);
+  uint64_t *tmp130 = tmp;
+  fsquare_times(t011, b10, tmp130, 50U);
   fmul0(c10, t011, b10, tmp);
   uint64_t *b11 = t1 + 4U;
   uint64_t *c1 = t1 + 8U;
   uint64_t *t01 = t1 + 12U;
   uint64_t *tmp1 = tmp;
   fsquare_times(t01, c1, tmp1, 100U);
-  fmul0(t01, t01, c1, tmp);
-  fsquare_times(t01, t01, tmp1, 50U);
-  fmul0(t01, t01, b11, tmp);
-  fsquare_times(t01, t01, tmp1, 5U);
+  uint64_t f1_copy0[4U] = { 0U };
+  memcpy(f1_copy0, t01, 4U * sizeof (uint64_t));
+  fmul0(t01, f1_copy0, c1, tmp);
+  uint64_t *tmp111 = tmp;
+  uint64_t i_copy1[4U] = { 0U };
+  memcpy(i_copy1, t01, 4U * sizeof (uint64_t));
+  fsquare_times(t01, i_copy1, tmp111, 50U);
+  uint64_t f1_copy1[4U] = { 0U };
+  memcpy(f1_copy1, t01, 4U * sizeof (uint64_t));
+  fmul0(t01, f1_copy1, b11, tmp);
+  uint64_t *tmp121 = tmp;
+  uint64_t i_copy[4U] = { 0U };
+  memcpy(i_copy, t01, 4U * sizeof (uint64_t));
+  fsquare_times(t01, i_copy, tmp121, 5U);
   uint64_t *a = t1;
   uint64_t *t0 = t1 + 12U;
   fmul0(o, t0, a, tmp);
@@ -319,7 +375,9 @@ static void encode_point(uint8_t *o, uint64_t *i)
   uint64_t u64s[4U] = { 0U };
   uint64_t tmp_w[16U] = { 0U };
   finv(tmp, z, tmp_w);
-  fmul0(tmp, tmp, x, tmp_w);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, tmp, 4U * sizeof (uint64_t));
+  fmul0(tmp, f1_copy, x, tmp_w);
   store_felem(u64s, tmp);
   KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, store64_le(o + i0 * 8U, u64s[i0]););
 }
@@ -334,16 +392,17 @@ Compute the scalar multiple of a point.
 void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
 {
   uint64_t init[8U] = { 0U };
+  uint64_t init_copy[8U] = { 0U };
   uint64_t tmp[4U] = { 0U };
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = tmp;
     uint8_t *bj = pub + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = tmp;
     os[i] = x;);
   uint64_t tmp3 = tmp[3U];
   tmp[3U] = tmp3 & 0x7fffffffffffffffULL;
@@ -357,7 +416,8 @@ void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
   x[1U] = tmp[1U];
   x[2U] = tmp[2U];
   x[3U] = tmp[3U];
-  montgomery_ladder(init, priv, init);
+  memcpy(init_copy, init, 8U * sizeof (uint64_t));
+  montgomery_ladder(init, priv, init_copy);
   encode_point(out, init);
 }
 
@@ -374,8 +434,8 @@ void Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv)
   uint8_t basepoint[32U] = { 0U };
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = basepoint;
     uint8_t x = g25519[i];
+    uint8_t *os = basepoint;
     os[i] = x;
   }
   Hacl_Curve25519_64_scalarmult(pub, priv, basepoint);
diff --git a/src/Hacl_EC_K256.c b/src/Hacl_EC_K256.c
index 581c223b..d5f6e1a9 100644
--- a/src/Hacl_EC_K256.c
+++ b/src/Hacl_EC_K256.c
@@ -267,9 +267,9 @@ void Hacl_EC_K256_point_mul(uint8_t *scalar, uint64_t *p, uint64_t *out)
     0U,
     4U,
     1U,
-    uint64_t *os = scalar_q;
     uint64_t u = load64_be(scalar + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = scalar_q;
     os[i] = x;);
   Hacl_Impl_K256_PointMul_point_mul(out, scalar_q, p);
 }
diff --git a/src/Hacl_Ed25519.c b/src/Hacl_Ed25519.c
index d1f8edf2..e347b02e 100644
--- a/src/Hacl_Ed25519.c
+++ b/src/Hacl_Ed25519.c
@@ -144,7 +144,9 @@ static inline void fsquare_times_inplace(uint64_t *output, uint32_t count)
   FStar_UInt128_uint128 tmp[5U];
   for (uint32_t _i = 0U; _i < 5U; ++_i)
     tmp[_i] = FStar_UInt128_uint64_to_uint128(0ULL);
-  Hacl_Curve25519_51_fsquare_times(output, output, tmp, count);
+  uint64_t input[5U] = { 0U };
+  memcpy(input, output, 5U * sizeof (uint64_t));
+  Hacl_Curve25519_51_fsquare_times(output, input, tmp, count);
 }
 
 void Hacl_Bignum25519_inverse(uint64_t *out, uint64_t *a)
@@ -215,11 +217,11 @@ void Hacl_Bignum25519_load_51(uint64_t *output, uint8_t *input)
     0U,
     4U,
     1U,
-    uint64_t *os = u64s;
     uint8_t *bj = input + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = u64s;
     os[i] = x;);
   uint64_t u64s3 = u64s[3U];
   u64s[3U] = u64s3 & 0x7fffffffffffffffULL;
@@ -252,7 +254,9 @@ void Hacl_Impl_Ed25519_PointDouble_point_double(uint64_t *out, uint64_t *p)
   fsum(tmp30, tmp1, tmp20);
   fdifference(tmp40, tmp1, tmp20);
   fsquare(tmp1, z1);
-  times_2(tmp1, tmp1);
+  uint64_t a_copy[5U] = { 0U };
+  memcpy(a_copy, tmp1, 5U * sizeof (uint64_t));
+  times_2(tmp1, a_copy);
   uint64_t *tmp10 = tmp;
   uint64_t *tmp2 = tmp + 5U;
   uint64_t *tmp3 = tmp + 10U;
@@ -260,12 +264,18 @@ void Hacl_Impl_Ed25519_PointDouble_point_double(uint64_t *out, uint64_t *p)
   uint64_t *x1 = p;
   uint64_t *y1 = p + 5U;
   fsum(tmp2, x1, y1);
-  fsquare(tmp2, tmp2);
+  uint64_t a_copy0[5U] = { 0U };
+  memcpy(a_copy0, tmp2, 5U * sizeof (uint64_t));
+  fsquare(tmp2, a_copy0);
   Hacl_Bignum25519_reduce_513(tmp3);
-  fdifference(tmp2, tmp3, tmp2);
+  uint64_t b_copy[5U] = { 0U };
+  memcpy(b_copy, tmp2, 5U * sizeof (uint64_t));
+  fdifference(tmp2, tmp3, b_copy);
   Hacl_Bignum25519_reduce_513(tmp10);
   Hacl_Bignum25519_reduce_513(tmp4);
-  fsum(tmp10, tmp10, tmp4);
+  uint64_t a_copy1[5U] = { 0U };
+  memcpy(a_copy1, tmp10, 5U * sizeof (uint64_t));
+  fsum(tmp10, a_copy1, tmp4);
   uint64_t *tmp_f = tmp;
   uint64_t *tmp_e = tmp + 5U;
   uint64_t *tmp_h = tmp + 10U;
@@ -308,12 +318,18 @@ void Hacl_Impl_Ed25519_PointAdd_point_add(uint64_t *out, uint64_t *p, uint64_t *
   uint64_t *z2 = q + 10U;
   uint64_t *t2 = q + 15U;
   times_2d(tmp10, t1);
-  fmul0(tmp10, tmp10, t2);
+  uint64_t inp_copy[5U] = { 0U };
+  memcpy(inp_copy, tmp10, 5U * sizeof (uint64_t));
+  fmul0(tmp10, inp_copy, t2);
   times_2(tmp2, z1);
-  fmul0(tmp2, tmp2, z2);
+  uint64_t inp_copy0[5U] = { 0U };
+  memcpy(inp_copy0, tmp2, 5U * sizeof (uint64_t));
+  fmul0(tmp2, inp_copy0, z2);
   fdifference(tmp5, tmp4, tmp3);
   fdifference(tmp6, tmp2, tmp10);
-  fsum(tmp10, tmp2, tmp10);
+  uint64_t a_copy[5U] = { 0U };
+  memcpy(a_copy, tmp10, 5U * sizeof (uint64_t));
+  fsum(tmp10, a_copy, tmp2);
   fsum(tmp2, tmp4, tmp3);
   uint64_t *tmp_g = tmp;
   uint64_t *tmp_h = tmp + 5U;
@@ -367,17 +383,27 @@ static inline void pow2_252m2(uint64_t *out, uint64_t *z)
   fsquare_times(a, z, 1U);
   fsquare_times(t00, a, 2U);
   fmul0(b0, t00, z);
-  fmul0(a, b0, a);
+  uint64_t inp_copy0[5U] = { 0U };
+  memcpy(inp_copy0, a, 5U * sizeof (uint64_t));
+  fmul0(a, inp_copy0, b0);
   fsquare_times(t00, a, 1U);
-  fmul0(b0, t00, b0);
+  uint64_t inp_copy1[5U] = { 0U };
+  memcpy(inp_copy1, b0, 5U * sizeof (uint64_t));
+  fmul0(b0, inp_copy1, t00);
   fsquare_times(t00, b0, 5U);
-  fmul0(b0, t00, b0);
+  uint64_t inp_copy2[5U] = { 0U };
+  memcpy(inp_copy2, b0, 5U * sizeof (uint64_t));
+  fmul0(b0, inp_copy2, t00);
   fsquare_times(t00, b0, 10U);
   fmul0(c0, t00, b0);
   fsquare_times(t00, c0, 20U);
-  fmul0(t00, t00, c0);
+  uint64_t inp_copy3[5U] = { 0U };
+  memcpy(inp_copy3, t00, 5U * sizeof (uint64_t));
+  fmul0(t00, inp_copy3, c0);
   fsquare_times_inplace(t00, 10U);
-  fmul0(b0, t00, b0);
+  uint64_t inp_copy4[5U] = { 0U };
+  memcpy(inp_copy4, b0, 5U * sizeof (uint64_t));
+  fmul0(b0, inp_copy4, t00);
   fsquare_times(t00, b0, 50U);
   uint64_t *a0 = buf;
   uint64_t *t0 = buf + 5U;
@@ -386,9 +412,13 @@ static inline void pow2_252m2(uint64_t *out, uint64_t *z)
   fsquare_times(a0, z, 1U);
   fmul0(c, t0, b);
   fsquare_times(t0, c, 100U);
-  fmul0(t0, t0, c);
+  uint64_t inp_copy[5U] = { 0U };
+  memcpy(inp_copy, t0, 5U * sizeof (uint64_t));
+  fmul0(t0, inp_copy, c);
   fsquare_times_inplace(t0, 50U);
-  fmul0(t0, t0, b);
+  uint64_t inp_copy5[5U] = { 0U };
+  memcpy(inp_copy5, t0, 5U * sizeof (uint64_t));
+  fmul0(t0, inp_copy5, b);
   fsquare_times_inplace(t0, 2U);
   fmul0(out, t0, a0);
 }
@@ -411,7 +441,9 @@ static inline void mul_modp_sqrt_m1(uint64_t *x)
   sqrt_m1[2U] = 0x0007ef5e9cbd0c60ULL;
   sqrt_m1[3U] = 0x00078595a6804c9eULL;
   sqrt_m1[4U] = 0x0002b8324804fc1dULL;
-  fmul0(x, x, sqrt_m1);
+  uint64_t inp_copy[5U] = { 0U };
+  memcpy(inp_copy, x, 5U * sizeof (uint64_t));
+  fmul0(x, inp_copy, sqrt_m1);
 }
 
 static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
@@ -450,11 +482,15 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
     one[4U] = 0ULL;
     fsquare(y2, y);
     times_d(dyy, y2);
-    fsum(dyy, dyy, one);
+    uint64_t a_copy0[5U] = { 0U };
+    memcpy(a_copy0, dyy, 5U * sizeof (uint64_t));
+    fsum(dyy, a_copy0, one);
     Hacl_Bignum25519_reduce_513(dyy);
     Hacl_Bignum25519_inverse(dyyi, dyy);
     fdifference(x2, y2, one);
-    fmul0(x2, x2, dyyi);
+    uint64_t inp_copy[5U] = { 0U };
+    memcpy(inp_copy, x2, 5U * sizeof (uint64_t));
+    fmul0(x2, inp_copy, dyyi);
     reduce(x2);
     bool x2_is_0 = is_0(x2);
     uint8_t z;
@@ -493,7 +529,9 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
       uint64_t *t00 = tmp + 10U;
       pow2_252m2(x31, x210);
       fsquare(t00, x31);
-      fdifference(t00, t00, x210);
+      uint64_t a_copy1[5U] = { 0U };
+      memcpy(a_copy1, t00, 5U * sizeof (uint64_t));
+      fdifference(t00, a_copy1, x210);
       Hacl_Bignum25519_reduce_513(t00);
       reduce(t00);
       bool t0_is_0 = is_0(t00);
@@ -505,15 +543,13 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
       uint64_t *x3 = tmp + 5U;
       uint64_t *t01 = tmp + 10U;
       fsquare(t01, x3);
-      fdifference(t01, t01, x211);
+      uint64_t a_copy[5U] = { 0U };
+      memcpy(a_copy, t01, 5U * sizeof (uint64_t));
+      fdifference(t01, a_copy, x211);
       Hacl_Bignum25519_reduce_513(t01);
       reduce(t01);
       bool z1 = is_0(t01);
-      if (z1 == false)
-      {
-        res = false;
-      }
-      else
+      if (z1)
       {
         uint64_t *x32 = tmp + 5U;
         uint64_t *t0 = tmp + 10U;
@@ -527,13 +563,19 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
           t0[2U] = 0ULL;
           t0[3U] = 0ULL;
           t0[4U] = 0ULL;
-          fdifference(x32, t0, x32);
+          uint64_t b_copy[5U] = { 0U };
+          memcpy(b_copy, x32, 5U * sizeof (uint64_t));
+          fdifference(x32, t0, b_copy);
           Hacl_Bignum25519_reduce_513(x32);
           reduce(x32);
         }
         memcpy(x, x32, 5U * sizeof (uint64_t));
         res = true;
       }
+      else
+      {
+        res = false;
+      }
     }
   }
   bool res0 = res;
@@ -551,11 +593,7 @@ bool Hacl_Impl_Ed25519_PointDecompress_point_decompress(uint64_t *out, uint8_t *
   Hacl_Bignum25519_load_51(y, s);
   bool z0 = recover_x(x, y, sign);
   bool res;
-  if (z0 == false)
-  {
-    res = false;
-  }
-  else
+  if (z0)
   {
     uint64_t *outx = out;
     uint64_t *outy = out + 5U;
@@ -571,6 +609,10 @@ bool Hacl_Impl_Ed25519_PointDecompress_point_decompress(uint64_t *out, uint8_t *
     fmul0(outt, x, y);
     res = true;
   }
+  else
+  {
+    res = false;
+  }
   bool res0 = res;
   return res0;
 }
@@ -578,20 +620,20 @@ bool Hacl_Impl_Ed25519_PointDecompress_point_decompress(uint64_t *out, uint8_t *
 void Hacl_Impl_Ed25519_PointCompress_point_compress(uint8_t *z, uint64_t *p)
 {
   uint64_t tmp[15U] = { 0U };
+  uint64_t *zinv = tmp;
   uint64_t *x = tmp + 5U;
-  uint64_t *out = tmp + 10U;
-  uint64_t *zinv1 = tmp;
-  uint64_t *x1 = tmp + 5U;
-  uint64_t *out1 = tmp + 10U;
+  uint64_t *out0 = tmp + 10U;
   uint64_t *px = p;
   uint64_t *py = p + 5U;
   uint64_t *pz = p + 10U;
-  Hacl_Bignum25519_inverse(zinv1, pz);
-  fmul0(x1, px, zinv1);
-  reduce(x1);
-  fmul0(out1, py, zinv1);
-  Hacl_Bignum25519_reduce_513(out1);
-  uint64_t x0 = x[0U];
+  Hacl_Bignum25519_inverse(zinv, pz);
+  fmul0(x, px, zinv);
+  reduce(x);
+  fmul0(out0, py, zinv);
+  Hacl_Bignum25519_reduce_513(out0);
+  uint64_t *x1 = tmp + 5U;
+  uint64_t *out = tmp + 10U;
+  uint64_t x0 = x1[0U];
   uint64_t b = x0 & 1ULL;
   Hacl_Bignum25519_store_51(z, out);
   uint8_t xbyte = (uint8_t)b;
@@ -1150,11 +1192,7 @@ static inline bool gte_q(uint64_t *s)
   {
     return false;
   }
-  if (s3 > 0x00000000000000ULL)
-  {
-    return true;
-  }
-  if (s2 > 0x000000000014deULL)
+  if (s3 > 0x00000000000000ULL || s2 > 0x000000000014deULL)
   {
     return true;
   }
@@ -1170,11 +1208,7 @@ static inline bool gte_q(uint64_t *s)
   {
     return false;
   }
-  if (s0 >= 0x12631a5cf5d3edULL)
-  {
-    return true;
-  }
-  return false;
+  return s0 >= 0x12631a5cf5d3edULL;
 }
 
 static inline bool eq(uint64_t *a, uint64_t *b)
@@ -1246,11 +1280,11 @@ void Hacl_Impl_Ed25519_Ladder_point_mul(uint64_t *out, uint8_t *scalar, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar;
     uint8_t *bj = scalar + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar;
     os[i] = x;);
   uint64_t table[320U] = { 0U };
   uint64_t tmp[20U] = { 0U };
@@ -1258,23 +1292,35 @@ void Hacl_Impl_Ed25519_Ladder_point_mul(uint64_t *out, uint8_t *scalar, uint64_t
   uint64_t *t1 = table + 20U;
   Hacl_Impl_Ed25519_PointConstants_make_point_inf(t0);
   memcpy(t1, q, 20U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 20U;
-    Hacl_Impl_Ed25519_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, t11, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointDouble_point_double(tmp, p_copy0);
     memcpy(table + (2U * i + 2U) * 20U, tmp, 20U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 20U;
-    Hacl_Impl_Ed25519_PointAdd_point_add(tmp, q, t2);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, q, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table + (2U * i + 3U) * 20U, tmp, 20U * sizeof (uint64_t)););
   Hacl_Impl_Ed25519_PointConstants_make_point_inf(out);
   uint64_t tmp0[20U] = { 0U };
   for (uint32_t i0 = 0U; i0 < 64U; i0++)
   {
-    KRML_MAYBE_FOR4(i, 0U, 4U, 1U, Hacl_Impl_Ed25519_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[20U] = { 0U };
+      memcpy(p_copy, out, 20U * sizeof (uint64_t));
+      Hacl_Impl_Ed25519_PointDouble_point_double(out, p_copy););
     uint32_t k = 256U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, bscalar, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 20U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -1284,11 +1330,13 @@ void Hacl_Impl_Ed25519_Ladder_point_mul(uint64_t *out, uint8_t *scalar, uint64_t
       const uint64_t *res_j = table + (i1 + 1U) * 20U;
       for (uint32_t i = 0U; i < 20U; i++)
       {
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;
       });
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp0);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy, tmp0);
   }
 }
 
@@ -1303,8 +1351,8 @@ static inline void precomp_get_consttime(const uint64_t *table, uint64_t bits_l,
     const uint64_t *res_j = table + (i0 + 1U) * 20U;
     for (uint32_t i = 0U; i < 20U; i++)
     {
-      uint64_t *os = tmp;
       uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+      uint64_t *os = tmp;
       os[i] = x;
     });
 }
@@ -1316,11 +1364,11 @@ static inline void point_mul_g(uint64_t *out, uint8_t *scalar)
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar;
     uint8_t *bj = scalar + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar;
     os[i] = x;);
   uint64_t q1[20U] = { 0U };
   uint64_t *gx = q1;
@@ -1384,23 +1432,41 @@ static inline void point_mul_g(uint64_t *out, uint8_t *scalar)
     0U,
     16U,
     1U,
-    KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, Hacl_Impl_Ed25519_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i0,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[20U] = { 0U };
+      memcpy(p_copy, out, 20U * sizeof (uint64_t));
+      Hacl_Impl_Ed25519_PointDouble_point_double(out, p_copy););
     uint32_t k = 64U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r4, k, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_g_pow2_192_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy, tmp);
     uint32_t k0 = 64U - 4U * i - 4U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r3, k0, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_g_pow2_128_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy0, tmp);
     uint32_t k1 = 64U - 4U * i - 4U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r2, k1, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_g_pow2_64_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy1[20U] = { 0U };
+    memcpy(p_copy1, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy1, tmp);
     uint32_t k2 = 64U - 4U * i - 4U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r1, k2, 4U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w4);
     precomp_get_consttime(Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp););
+    uint64_t p_copy2[20U] = { 0U };
+    memcpy(p_copy2, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy2, tmp););
   KRML_MAYBE_UNUSED_VAR(q2);
   KRML_MAYBE_UNUSED_VAR(q3);
   KRML_MAYBE_UNUSED_VAR(q4);
@@ -1441,21 +1507,21 @@ point_mul_g_double_vartime(uint64_t *out, uint8_t *scalar1, uint8_t *scalar2, ui
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar1;
     uint8_t *bj = scalar1 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar1;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = bscalar2;
     uint8_t *bj = scalar2 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = bscalar2;
     os[i] = x;);
   uint64_t table2[640U] = { 0U };
   uint64_t tmp1[20U] = { 0U };
@@ -1463,15 +1529,20 @@ point_mul_g_double_vartime(uint64_t *out, uint8_t *scalar1, uint8_t *scalar2, ui
   uint64_t *t1 = table2 + 20U;
   Hacl_Impl_Ed25519_PointConstants_make_point_inf(t0);
   memcpy(t1, q2, 20U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 20U;
-    Hacl_Impl_Ed25519_PointDouble_point_double(tmp1, t11);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, t11, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointDouble_point_double(tmp1, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 20U, tmp1, 20U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 20U;
-    Hacl_Impl_Ed25519_PointAdd_point_add(tmp1, q2, t2);
+    uint64_t p_copy[20U] = { 0U };
+    memcpy(p_copy, q2, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(tmp1, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 20U, tmp1, 20U * sizeof (uint64_t)););
   uint64_t tmp10[20U] = { 0U };
   uint32_t i0 = 255U;
@@ -1486,25 +1557,39 @@ point_mul_g_double_vartime(uint64_t *out, uint8_t *scalar1, uint8_t *scalar2, ui
   uint32_t bits_l320 = (uint32_t)bits_c0;
   const uint64_t *a_bits_l0 = table2 + bits_l320 * 20U;
   memcpy(tmp10, (uint64_t *)a_bits_l0, 20U * sizeof (uint64_t));
-  Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp10);
+  uint64_t p_copy[20U] = { 0U };
+  memcpy(p_copy, out, 20U * sizeof (uint64_t));
+  Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy, tmp10);
   uint64_t tmp11[20U] = { 0U };
   for (uint32_t i = 0U; i < 51U; i++)
   {
-    KRML_MAYBE_FOR5(i2, 0U, 5U, 1U, Hacl_Impl_Ed25519_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR5(i2,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy0[20U] = { 0U };
+      memcpy(p_copy0, out, 20U * sizeof (uint64_t));
+      Hacl_Impl_Ed25519_PointDouble_point_double(out, p_copy0););
     uint32_t k = 255U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, bscalar2, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l321 = (uint32_t)bits_l;
     const uint64_t *a_bits_l1 = table2 + bits_l321 * 20U;
     memcpy(tmp11, (uint64_t *)a_bits_l1, 20U * sizeof (uint64_t));
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp11);
+    uint64_t p_copy0[20U] = { 0U };
+    memcpy(p_copy0, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy0, tmp11);
     uint32_t k0 = 255U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, bscalar1, k0, 5U);
+    KRML_HOST_IGNORE(Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l322 = (uint32_t)bits_l0;
     const
     uint64_t
     *a_bits_l2 = Hacl_Ed25519_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * 20U;
     memcpy(tmp11, (uint64_t *)a_bits_l2, 20U * sizeof (uint64_t));
-    Hacl_Impl_Ed25519_PointAdd_point_add(out, out, tmp11);
+    uint64_t p_copy1[20U] = { 0U };
+    memcpy(p_copy1, out, 20U * sizeof (uint64_t));
+    Hacl_Impl_Ed25519_PointAdd_point_add(out, p_copy1, tmp11);
   }
 }
 
@@ -1624,10 +1709,10 @@ static inline void sha512_pre_msg(uint8_t *hash, uint8_t *prefix, uint32_t len,
 {
   uint8_t buf[128U] = { 0U };
   uint64_t block_state[8U] = { 0U };
+  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64 p = s;
-  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64 *st = &p;
   Hacl_Streaming_Types_error_code err0 = Hacl_Hash_SHA2_update_512(st, prefix, 32U);
   Hacl_Streaming_Types_error_code err1 = Hacl_Hash_SHA2_update_512(st, input, len);
@@ -1647,10 +1732,10 @@ sha512_pre_pre2_msg(
 {
   uint8_t buf[128U] = { 0U };
   uint64_t block_state[8U] = { 0U };
+  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64 p = s;
-  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64 *st = &p;
   Hacl_Streaming_Types_error_code err0 = Hacl_Hash_SHA2_update_512(st, prefix, 32U);
   Hacl_Streaming_Types_error_code err1 = Hacl_Hash_SHA2_update_512(st, prefix2, 32U);
@@ -1734,10 +1819,10 @@ Compute the expanded keys for an Ed25519 signature.
 */
 void Hacl_Ed25519_expand_keys(uint8_t *expanded_keys, uint8_t *private_key)
 {
-  uint8_t *public_key = expanded_keys;
   uint8_t *s_prefix = expanded_keys + 32U;
-  uint8_t *s = expanded_keys + 32U;
   secret_expand(s_prefix, private_key);
+  uint8_t *public_key = expanded_keys;
+  uint8_t *s = expanded_keys + 32U;
   point_mul_g_compress(public_key, s);
 }
 
@@ -1774,8 +1859,12 @@ Hacl_Ed25519_sign_expanded(
   sha512_modq_pre_pre2(hq, rs, public_key, msg_len, msg);
   uint64_t aq[5U] = { 0U };
   load_32_bytes(aq, s);
-  mul_modq(aq, hq, aq);
-  add_modq(aq, rq, aq);
+  uint64_t y_copy[5U] = { 0U };
+  memcpy(y_copy, aq, 5U * sizeof (uint64_t));
+  mul_modq(aq, hq, y_copy);
+  uint64_t y_copy0[5U] = { 0U };
+  memcpy(y_copy0, aq, 5U * sizeof (uint64_t));
+  add_modq(aq, rq, y_copy0);
   store_56(ss, aq);
 }
 
diff --git a/src/Hacl_FFDHE.c b/src/Hacl_FFDHE.c
index 098aa607..55f5ce31 100644
--- a/src/Hacl_FFDHE.c
+++ b/src/Hacl_FFDHE.c
@@ -140,8 +140,8 @@ static inline void ffdhe_precomp_p(Spec_FFDHE_ffdhe_alg a, uint64_t *p_r2_n)
   uint32_t len = ffdhe_len(a);
   for (uint32_t i = 0U; i < len; i++)
   {
-    uint8_t *os = p_s;
     uint8_t x = p[i];
+    uint8_t *os = p_s;
     os[i] = x;
   }
   Hacl_Bignum_Convert_bn_from_bytes_be_uint64(ffdhe_len(a), p_s, p_n);
@@ -158,6 +158,7 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
   uint64_t p_n1[nLen];
   memset(p_n1, 0U, nLen * sizeof (uint64_t));
   uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, p_n[0U], 1ULL, p_n1);
+  uint64_t c1;
   if (1U < nLen)
   {
     uint64_t *a1 = p_n + 1U;
@@ -184,13 +185,14 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, 0ULL, res_i);
     }
-    uint64_t c1 = c;
-    KRML_MAYBE_UNUSED_VAR(c1);
+    uint64_t c10 = c;
+    c1 = c10;
   }
   else
   {
-    KRML_MAYBE_UNUSED_VAR(c0);
+    c1 = c0;
   }
+  KRML_MAYBE_UNUSED_VAR(c1);
   KRML_CHECK_SIZE(sizeof (uint64_t), nLen);
   uint64_t b2[nLen];
   memset(b2, 0U, nLen * sizeof (uint64_t));
@@ -202,7 +204,7 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
   {
     uint64_t beq = FStar_UInt64_eq_mask(b2[i], pk_n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(b2[i], pk_n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t res = acc0;
   uint64_t m0 = res;
@@ -211,7 +213,7 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
   {
     uint64_t beq = FStar_UInt64_eq_mask(pk_n[i], p_n1[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(pk_n[i], p_n1[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   return m0 & m1;
@@ -279,8 +281,8 @@ Hacl_FFDHE_ffdhe_secret_to_public_precomp(
   memset(g_n, 0U, nLen * sizeof (uint64_t));
   uint8_t g = 0U;
   {
-    uint8_t *os = &g;
     uint8_t x = Hacl_Impl_FFDHE_Constants_ffdhe_g2[0U];
+    uint8_t *os = &g;
     os[0U] = x;
   }
   Hacl_Bignum_Convert_bn_from_bytes_be_uint64(1U, &g, g_n);
diff --git a/src/Hacl_Frodo1344.c b/src/Hacl_Frodo1344.c
index 9fe78471..33f87629 100644
--- a/src/Hacl_Frodo1344.c
+++ b/src/Hacl_Frodo1344.c
@@ -210,10 +210,10 @@ uint32_t Hacl_Frodo1344_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t kp_s[32U] = { 0U };
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;
   }
   uint32_t ss_init_len = 21664U;
diff --git a/src/Hacl_Frodo64.c b/src/Hacl_Frodo64.c
index 19f1562d..f88c5d63 100644
--- a/src/Hacl_Frodo64.c
+++ b/src/Hacl_Frodo64.c
@@ -214,10 +214,10 @@ uint32_t Hacl_Frodo64_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
     0U,
     16U,
     1U,
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;);
   uint32_t ss_init_len = 1096U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
diff --git a/src/Hacl_Frodo640.c b/src/Hacl_Frodo640.c
index 8cf0253e..95feeb20 100644
--- a/src/Hacl_Frodo640.c
+++ b/src/Hacl_Frodo640.c
@@ -212,10 +212,10 @@ uint32_t Hacl_Frodo640_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
     0U,
     16U,
     1U,
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;);
   uint32_t ss_init_len = 9736U;
   KRML_CHECK_SIZE(sizeof (uint8_t), ss_init_len);
diff --git a/src/Hacl_Frodo976.c b/src/Hacl_Frodo976.c
index 9360e3af..879fb5b2 100644
--- a/src/Hacl_Frodo976.c
+++ b/src/Hacl_Frodo976.c
@@ -210,10 +210,10 @@ uint32_t Hacl_Frodo976_crypto_kem_dec(uint8_t *ss, uint8_t *ct, uint8_t *sk)
   uint8_t kp_s[24U] = { 0U };
   for (uint32_t i = 0U; i < 24U; i++)
   {
-    uint8_t *os = kp_s;
     uint8_t uu____0 = s[i];
     uint8_t
     x = (uint32_t)uu____0 ^ ((uint32_t)(uint8_t)mask0 & ((uint32_t)kp[i] ^ (uint32_t)uu____0));
+    uint8_t *os = kp_s;
     os[i] = x;
   }
   uint32_t ss_init_len = 15768U;
diff --git a/src/Hacl_Frodo_KEM.c b/src/Hacl_Frodo_KEM.c
index e0a65a47..f15d57ac 100644
--- a/src/Hacl_Frodo_KEM.c
+++ b/src/Hacl_Frodo_KEM.c
@@ -30,6 +30,7 @@
 
 void randombytes_(uint32_t len, uint8_t *res)
 {
-  Lib_RandomBuffer_System_randombytes(res, len);
+  bool b = Lib_RandomBuffer_System_randombytes(res, len);
+  KRML_MAYBE_UNUSED_VAR(b);
 }
 
diff --git a/src/Hacl_GenericField32.c b/src/Hacl_GenericField32.c
index f509e6d4..3e7597bd 100644
--- a/src/Hacl_GenericField32.c
+++ b/src/Hacl_GenericField32.c
@@ -102,9 +102,9 @@ Deallocate the memory previously allocated by Hacl_GenericField32_field_init.
 */
 void Hacl_GenericField32_field_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t *n = k1.n;
-  uint32_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t *n = uu____0.n;
+  uint32_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -117,8 +117,7 @@ Return the size of a modulus `n` in limbs.
 */
 uint32_t Hacl_GenericField32_field_get_len(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  return k1.len;
+  return (*k).len;
 }
 
 /**
@@ -137,8 +136,8 @@ Hacl_GenericField32_to_field(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_to_mont_u32(len1, k1.n, k1.mu, k1.r2, a, aM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_to_mont_u32(len1, uu____0.n, uu____0.mu, uu____0.r2, a, aM);
 }
 
 /**
@@ -158,8 +157,8 @@ Hacl_GenericField32_from_field(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, k1.n, k1.mu, aM, a);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, uu____0.n, uu____0.mu, aM, a);
 }
 
 /**
@@ -177,8 +176,16 @@ Hacl_GenericField32_add(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_bn_add_mod_n_u32(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint32_t), len1);
+  uint32_t a_copy[len1];
+  memset(a_copy, 0U, len1 * sizeof (uint32_t));
+  KRML_CHECK_SIZE(sizeof (uint32_t), len1);
+  uint32_t b_copy[len1];
+  memset(b_copy, 0U, len1 * sizeof (uint32_t));
+  memcpy(a_copy, aM, len1 * sizeof (uint32_t));
+  memcpy(b_copy, bM, len1 * sizeof (uint32_t));
+  Hacl_Bignum_bn_add_mod_n_u32(len1, uu____0.n, a_copy, b_copy, cM);
 }
 
 /**
@@ -196,8 +203,7 @@ Hacl_GenericField32_sub(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_bn_sub_mod_n_u32(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_bn_sub_mod_n_u32(len1, (*k).n, aM, bM, cM);
 }
 
 /**
@@ -215,8 +221,8 @@ Hacl_GenericField32_mul(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, k1.n, k1.mu, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, uu____0.n, uu____0.mu, aM, bM, cM);
 }
 
 /**
@@ -233,8 +239,8 @@ Hacl_GenericField32_sqr(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, k1.n, k1.mu, aM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, uu____0.n, uu____0.mu, aM, cM);
 }
 
 /**
@@ -246,8 +252,8 @@ Convert a bignum `one` to its Montgomery representation.
 void Hacl_GenericField32_one(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k, uint32_t *oneM)
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, k1.n, k1.mu, k1.r2, oneM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, uu____0.n, uu____0.mu, uu____0.r2, oneM);
 }
 
 /**
@@ -278,22 +284,22 @@ Hacl_GenericField32_exp_consttime(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint32_t), k1.len);
-  uint32_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint32_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint32_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint32_t), uu____0.len);
+  uint32_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint32_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint32_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     uint32_t sw = 0U;
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 32U;
@@ -308,9 +314,9 @@ Hacl_GenericField32_exp_consttime(
         aMc[i] = aMc[i] ^ dummy;
       }
       uint32_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, k1.mu, aMc, resM, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, uu____0.mu, aMc, resM, aMc);
       uint32_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, k1.mu, resM, resM);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, uu____0.mu, resM, resM);
       sw = bit;
     }
     uint32_t sw0 = sw;
@@ -335,8 +341,8 @@ Hacl_GenericField32_exp_consttime(
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     KRML_CHECK_SIZE(sizeof (uint32_t), 16U * len1);
     uint32_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint32_t));
@@ -347,19 +353,20 @@ Hacl_GenericField32_exp_consttime(
     uint32_t *t1 = table + len1;
     uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint32_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint32_t *t11 = table + (i + 1U) * len1;
       uint32_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint32_t));
       uint32_t *t2 = table + (2U * i + 2U) * len1;
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint32_t)););
     if (bBits % 4U != 0U)
     {
@@ -374,8 +381,8 @@ Hacl_GenericField32_exp_consttime(
         const uint32_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint32_t *os = resM;
           uint32_t x = (c & res_j[i]) | (~c & resM[i]);
+          uint32_t *os = resM;
           os[i] = x;
         });
     }
@@ -383,7 +390,7 @@ Hacl_GenericField32_exp_consttime(
     {
       uint32_t *ctx_n = ctx;
       uint32_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint32_t), len1);
     uint32_t tmp0[len1];
@@ -395,9 +402,10 @@ Hacl_GenericField32_exp_consttime(
         4U,
         1U,
         uint32_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i0 - 4U;
       uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       memcpy(tmp0, (uint32_t *)(table + 0U * len1), len1 * sizeof (uint32_t));
       KRML_MAYBE_FOR15(i1,
         0U,
@@ -407,12 +415,12 @@ Hacl_GenericField32_exp_consttime(
         const uint32_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint32_t *os = tmp0;
           uint32_t x = (c & res_j[i]) | (~c & tmp0[i]);
+          uint32_t *os = tmp0;
           os[i] = x;
         });
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -445,21 +453,21 @@ Hacl_GenericField32_exp_vartime(
 )
 {
   uint32_t len1 = Hacl_GenericField32_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint32_t), k1.len);
-  uint32_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint32_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint32_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint32_t), uu____0.len);
+  uint32_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint32_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint32_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     uint32_t *ctx_n = ctx;
     uint32_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 32U;
@@ -469,10 +477,10 @@ Hacl_GenericField32_exp_vartime(
       if (!(bit == 0U))
       {
         uint32_t *ctx_n0 = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, k1.mu, resM, aMc, resM);
+        Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n0, uu____0.mu, resM, aMc, resM);
       }
       uint32_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n0, k1.mu, aMc, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n0, uu____0.mu, aMc, aMc);
     }
   }
   else
@@ -489,8 +497,8 @@ Hacl_GenericField32_exp_vartime(
     KRML_CHECK_SIZE(sizeof (uint32_t), len1 + len1);
     uint32_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint32_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint32_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint32_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint32_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint32_t));
     KRML_CHECK_SIZE(sizeof (uint32_t), 16U * len1);
     uint32_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint32_t));
@@ -501,19 +509,20 @@ Hacl_GenericField32_exp_vartime(
     uint32_t *t1 = table + len1;
     uint32_t *ctx_n0 = ctx;
     uint32_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint32_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint32_t *t11 = table + (i + 1U) * len1;
       uint32_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint32_t));
       uint32_t *t2 = table + (2U * i + 2U) * len1;
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint32_t)););
     if (bBits % 4U != 0U)
     {
@@ -527,7 +536,7 @@ Hacl_GenericField32_exp_vartime(
     {
       uint32_t *ctx_n = ctx;
       uint32_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u32(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint32_t), len1);
     uint32_t tmp0[len1];
@@ -539,14 +548,15 @@ Hacl_GenericField32_exp_vartime(
         4U,
         1U,
         uint32_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u32(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i - 4U;
       uint32_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u32(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       uint32_t bits_l32 = bits_l;
       const uint32_t *a_bits_l = table + bits_l32 * len1;
       memcpy(tmp0, (uint32_t *)a_bits_l, len1 * sizeof (uint32_t));
       uint32_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u32(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -569,16 +579,16 @@ Hacl_GenericField32_inverse(
   uint32_t *aInvM
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 k1 = *k;
-  uint32_t len1 = k1.len;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 uu____0 = *k;
+  uint32_t len1 = uu____0.len;
   KRML_CHECK_SIZE(sizeof (uint32_t), len1);
   uint32_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint32_t));
-  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, k1.n[0U], 2U, n2);
+  uint32_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u32(0U, uu____0.n[0U], 2U, n2);
   uint32_t c1;
   if (1U < len1)
   {
-    uint32_t *a1 = k1.n + 1U;
+    uint32_t *a1 = uu____0.n + 1U;
     uint32_t *res1 = n2 + 1U;
     uint32_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -610,6 +620,6 @@ Hacl_GenericField32_inverse(
     c1 = c0;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
-  Hacl_GenericField32_exp_vartime(k, aM, k1.len * 32U, n2, aInvM);
+  Hacl_GenericField32_exp_vartime(k, aM, uu____0.len * 32U, n2, aInvM);
 }
 
diff --git a/src/Hacl_GenericField64.c b/src/Hacl_GenericField64.c
index 3f291d36..3092ac02 100644
--- a/src/Hacl_GenericField64.c
+++ b/src/Hacl_GenericField64.c
@@ -101,9 +101,9 @@ Deallocate the memory previously allocated by Hacl_GenericField64_field_init.
 */
 void Hacl_GenericField64_field_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint64_t *n = k1.n;
-  uint64_t *r2 = k1.r2;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint64_t *n = uu____0.n;
+  uint64_t *r2 = uu____0.r2;
   KRML_HOST_FREE(n);
   KRML_HOST_FREE(r2);
   KRML_HOST_FREE(k);
@@ -116,8 +116,7 @@ Return the size of a modulus `n` in limbs.
 */
 uint32_t Hacl_GenericField64_field_get_len(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k)
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  return k1.len;
+  return (*k).len;
 }
 
 /**
@@ -136,8 +135,8 @@ Hacl_GenericField64_to_field(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_to_mont_u64(len1, k1.n, k1.mu, k1.r2, a, aM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_to_mont_u64(len1, uu____0.n, uu____0.mu, uu____0.r2, a, aM);
 }
 
 /**
@@ -157,8 +156,8 @@ Hacl_GenericField64_from_field(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, k1.n, k1.mu, aM, a);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, uu____0.n, uu____0.mu, aM, a);
 }
 
 /**
@@ -176,8 +175,16 @@ Hacl_GenericField64_add(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_bn_add_mod_n_u64(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint64_t), len1);
+  uint64_t a_copy[len1];
+  memset(a_copy, 0U, len1 * sizeof (uint64_t));
+  KRML_CHECK_SIZE(sizeof (uint64_t), len1);
+  uint64_t b_copy[len1];
+  memset(b_copy, 0U, len1 * sizeof (uint64_t));
+  memcpy(a_copy, aM, len1 * sizeof (uint64_t));
+  memcpy(b_copy, bM, len1 * sizeof (uint64_t));
+  Hacl_Bignum_bn_add_mod_n_u64(len1, uu____0.n, a_copy, b_copy, cM);
 }
 
 /**
@@ -195,8 +202,7 @@ Hacl_GenericField64_sub(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_bn_sub_mod_n_u64(len1, k1.n, aM, bM, cM);
+  Hacl_Bignum_bn_sub_mod_n_u64(len1, (*k).n, aM, bM, cM);
 }
 
 /**
@@ -214,8 +220,8 @@ Hacl_GenericField64_mul(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, k1.n, k1.mu, aM, bM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, uu____0.n, uu____0.mu, aM, bM, cM);
 }
 
 /**
@@ -232,8 +238,8 @@ Hacl_GenericField64_sqr(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, k1.n, k1.mu, aM, cM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, uu____0.n, uu____0.mu, aM, cM);
 }
 
 /**
@@ -245,8 +251,8 @@ Convert a bignum `one` to its Montgomery representation.
 void Hacl_GenericField64_one(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 *k, uint64_t *oneM)
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, k1.n, k1.mu, k1.r2, oneM);
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, uu____0.n, uu____0.mu, uu____0.r2, oneM);
 }
 
 /**
@@ -277,22 +283,22 @@ Hacl_GenericField64_exp_consttime(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint64_t), k1.len);
-  uint64_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint64_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint64_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint64_t), uu____0.len);
+  uint64_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint64_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint64_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     uint64_t sw = 0ULL;
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i0 = 0U; i0 < bBits; i0++)
     {
       uint32_t i1 = (bBits - i0 - 1U) / 64U;
@@ -307,9 +313,9 @@ Hacl_GenericField64_exp_consttime(
         aMc[i] = aMc[i] ^ dummy;
       }
       uint64_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, k1.mu, aMc, resM, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, uu____0.mu, aMc, resM, aMc);
       uint64_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, k1.mu, resM, resM);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, uu____0.mu, resM, resM);
       sw = bit;
     }
     uint64_t sw0 = sw;
@@ -334,8 +340,8 @@ Hacl_GenericField64_exp_consttime(
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     KRML_CHECK_SIZE(sizeof (uint64_t), 16U * len1);
     uint64_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint64_t));
@@ -346,19 +352,20 @@ Hacl_GenericField64_exp_consttime(
     uint64_t *t1 = table + len1;
     uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint64_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint64_t *t11 = table + (i + 1U) * len1;
       uint64_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint64_t));
       uint64_t *t2 = table + (2U * i + 2U) * len1;
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint64_t)););
     if (bBits % 4U != 0U)
     {
@@ -373,8 +380,8 @@ Hacl_GenericField64_exp_consttime(
         const uint64_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint64_t *os = resM;
           uint64_t x = (c & res_j[i]) | (~c & resM[i]);
+          uint64_t *os = resM;
           os[i] = x;
         });
     }
@@ -382,7 +389,7 @@ Hacl_GenericField64_exp_consttime(
     {
       uint64_t *ctx_n = ctx;
       uint64_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint64_t), len1);
     uint64_t tmp0[len1];
@@ -394,9 +401,10 @@ Hacl_GenericField64_exp_consttime(
         4U,
         1U,
         uint64_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i0 - 4U;
       uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       memcpy(tmp0, (uint64_t *)(table + 0U * len1), len1 * sizeof (uint64_t));
       KRML_MAYBE_FOR15(i1,
         0U,
@@ -406,12 +414,12 @@ Hacl_GenericField64_exp_consttime(
         const uint64_t *res_j = table + (i1 + 1U) * len1;
         for (uint32_t i = 0U; i < len1; i++)
         {
-          uint64_t *os = tmp0;
           uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+          uint64_t *os = tmp0;
           os[i] = x;
         });
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -444,21 +452,21 @@ Hacl_GenericField64_exp_vartime(
 )
 {
   uint32_t len1 = Hacl_GenericField64_field_get_len(k);
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  KRML_CHECK_SIZE(sizeof (uint64_t), k1.len);
-  uint64_t aMc[k1.len];
-  memset(aMc, 0U, k1.len * sizeof (uint64_t));
-  memcpy(aMc, aM, k1.len * sizeof (uint64_t));
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  KRML_CHECK_SIZE(sizeof (uint64_t), uu____0.len);
+  uint64_t aMc[uu____0.len];
+  memset(aMc, 0U, uu____0.len * sizeof (uint64_t));
+  memcpy(aMc, aM, uu____0.len * sizeof (uint64_t));
   if (bBits < 200U)
   {
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     uint64_t *ctx_n = ctx;
     uint64_t *ctx_r2 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     for (uint32_t i = 0U; i < bBits; i++)
     {
       uint32_t i1 = i / 64U;
@@ -468,10 +476,10 @@ Hacl_GenericField64_exp_vartime(
       if (!(bit == 0ULL))
       {
         uint64_t *ctx_n0 = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, k1.mu, resM, aMc, resM);
+        Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n0, uu____0.mu, resM, aMc, resM);
       }
       uint64_t *ctx_n0 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n0, k1.mu, aMc, aMc);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n0, uu____0.mu, aMc, aMc);
     }
   }
   else
@@ -488,8 +496,8 @@ Hacl_GenericField64_exp_vartime(
     KRML_CHECK_SIZE(sizeof (uint64_t), len1 + len1);
     uint64_t ctx[len1 + len1];
     memset(ctx, 0U, (len1 + len1) * sizeof (uint64_t));
-    memcpy(ctx, k1.n, len1 * sizeof (uint64_t));
-    memcpy(ctx + len1, k1.r2, len1 * sizeof (uint64_t));
+    memcpy(ctx, uu____0.n, len1 * sizeof (uint64_t));
+    memcpy(ctx + len1, uu____0.r2, len1 * sizeof (uint64_t));
     KRML_CHECK_SIZE(sizeof (uint64_t), 16U * len1);
     uint64_t table[16U * len1];
     memset(table, 0U, 16U * len1 * sizeof (uint64_t));
@@ -500,19 +508,20 @@ Hacl_GenericField64_exp_vartime(
     uint64_t *t1 = table + len1;
     uint64_t *ctx_n0 = ctx;
     uint64_t *ctx_r20 = ctx + len1;
-    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, k1.mu, ctx_r20, t0);
+    Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n0, uu____0.mu, ctx_r20, t0);
     memcpy(t1, aMc, len1 * sizeof (uint64_t));
+    KRML_MAYBE_UNUSED_VAR(table);
     KRML_MAYBE_FOR7(i,
       0U,
       7U,
       1U,
       uint64_t *t11 = table + (i + 1U) * len1;
       uint64_t *ctx_n1 = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, k1.mu, t11, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n1, uu____0.mu, t11, tmp);
       memcpy(table + (2U * i + 2U) * len1, tmp, len1 * sizeof (uint64_t));
       uint64_t *t2 = table + (2U * i + 2U) * len1;
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, aMc, t2, tmp);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, aMc, t2, tmp);
       memcpy(table + (2U * i + 3U) * len1, tmp, len1 * sizeof (uint64_t)););
     if (bBits % 4U != 0U)
     {
@@ -526,7 +535,7 @@ Hacl_GenericField64_exp_vartime(
     {
       uint64_t *ctx_n = ctx;
       uint64_t *ctx_r2 = ctx + len1;
-      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, k1.mu, ctx_r2, resM);
+      Hacl_Bignum_Montgomery_bn_from_mont_u64(len1, ctx_n, uu____0.mu, ctx_r2, resM);
     }
     KRML_CHECK_SIZE(sizeof (uint64_t), len1);
     uint64_t tmp0[len1];
@@ -538,14 +547,15 @@ Hacl_GenericField64_exp_vartime(
         4U,
         1U,
         uint64_t *ctx_n = ctx;
-        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, k1.mu, resM, resM););
+        Hacl_Bignum_Montgomery_bn_mont_sqr_u64(len1, ctx_n, uu____0.mu, resM, resM););
       uint32_t k2 = bBits - bBits % 4U - 4U * i - 4U;
       uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(bLen, b, k2, 4U);
+      KRML_MAYBE_UNUSED_VAR(table);
       uint32_t bits_l32 = (uint32_t)bits_l;
       const uint64_t *a_bits_l = table + bits_l32 * len1;
       memcpy(tmp0, (uint64_t *)a_bits_l, len1 * sizeof (uint64_t));
       uint64_t *ctx_n = ctx;
-      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, k1.mu, resM, tmp0, resM);
+      Hacl_Bignum_Montgomery_bn_mont_mul_u64(len1, ctx_n, uu____0.mu, resM, tmp0, resM);
     }
   }
 }
@@ -568,16 +578,16 @@ Hacl_GenericField64_inverse(
   uint64_t *aInvM
 )
 {
-  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 k1 = *k;
-  uint32_t len1 = k1.len;
+  Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64 uu____0 = *k;
+  uint32_t len1 = uu____0.len;
   KRML_CHECK_SIZE(sizeof (uint64_t), len1);
   uint64_t n2[len1];
   memset(n2, 0U, len1 * sizeof (uint64_t));
-  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, k1.n[0U], 2ULL, n2);
+  uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, uu____0.n[0U], 2ULL, n2);
   uint64_t c1;
   if (1U < len1)
   {
-    uint64_t *a1 = k1.n + 1U;
+    uint64_t *a1 = uu____0.n + 1U;
     uint64_t *res1 = n2 + 1U;
     uint64_t c = c0;
     for (uint32_t i = 0U; i < (len1 - 1U) / 4U; i++)
@@ -609,6 +619,6 @@ Hacl_GenericField64_inverse(
     c1 = c0;
   }
   KRML_MAYBE_UNUSED_VAR(c1);
-  Hacl_GenericField64_exp_vartime(k, aM, k1.len * 64U, n2, aInvM);
+  Hacl_GenericField64_exp_vartime(k, aM, uu____0.len * 64U, n2, aInvM);
 }
 
diff --git a/src/Hacl_HKDF.c b/src/Hacl_HKDF.c
index 027b719f..be05c08c 100644
--- a/src/Hacl_HKDF.c
+++ b/src/Hacl_HKDF.c
@@ -51,36 +51,45 @@ Hacl_HKDF_expand_sha2_256(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_compute_sha2_256(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_sha2_256(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_compute_sha2_256(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_sha2_256(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -131,36 +140,45 @@ Hacl_HKDF_expand_sha2_384(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_compute_sha2_384(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_sha2_384(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_compute_sha2_384(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_sha2_384(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -211,36 +229,45 @@ Hacl_HKDF_expand_sha2_512(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_compute_sha2_512(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_sha2_512(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_compute_sha2_512(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_sha2_512(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -291,36 +318,45 @@ Hacl_HKDF_expand_blake2s_32(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_compute_blake2s_32(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_blake2s_32(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_compute_blake2s_32(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_blake2s_32(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
@@ -371,36 +407,45 @@ Hacl_HKDF_expand_blake2b_32(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_compute_blake2b_32(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_blake2b_32(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_compute_blake2b_32(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_compute_blake2b_32(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
diff --git a/src/Hacl_HKDF_Blake2b_256.c b/src/Hacl_HKDF_Blake2b_256.c
index fe89115d..82a3ea15 100644
--- a/src/Hacl_HKDF_Blake2b_256.c
+++ b/src/Hacl_HKDF_Blake2b_256.c
@@ -51,36 +51,45 @@ Hacl_HKDF_Blake2b_256_expand_blake2b_256(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_Blake2b_256_compute_blake2b_256(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_Blake2b_256_compute_blake2b_256(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_Blake2b_256_compute_blake2b_256(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_Blake2b_256_compute_blake2b_256(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
diff --git a/src/Hacl_HKDF_Blake2s_128.c b/src/Hacl_HKDF_Blake2s_128.c
index 4c9e9450..879432a4 100644
--- a/src/Hacl_HKDF_Blake2s_128.c
+++ b/src/Hacl_HKDF_Blake2s_128.c
@@ -51,36 +51,45 @@ Hacl_HKDF_Blake2s_128_expand_blake2s_128(
   KRML_CHECK_SIZE(sizeof (uint8_t), tlen + infolen + 1U);
   uint8_t text[tlen + infolen + 1U];
   memset(text, 0U, (tlen + infolen + 1U) * sizeof (uint8_t));
-  uint8_t *text0 = text + tlen;
-  uint8_t *tag = text;
-  uint8_t *ctr = text + tlen + infolen;
   memcpy(text + tlen, info, infolen * sizeof (uint8_t));
+  KRML_CHECK_SIZE(sizeof (uint8_t), tlen);
+  uint8_t tag[tlen];
+  memset(tag, 0U, tlen * sizeof (uint8_t));
   for (uint32_t i = 0U; i < n; i++)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(i + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (i == 0U)
     {
       Hacl_HMAC_Blake2s_128_compute_blake2s_128(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_Blake2s_128_compute_blake2s_128(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     memcpy(output + i * tlen, tag, tlen * sizeof (uint8_t));
   }
   if (n * tlen < len)
   {
+    uint8_t *ctr = text + tlen + infolen;
     ctr[0U] = (uint8_t)(n + 1U);
+    KRML_MAYBE_UNUSED_VAR(text);
+    uint8_t *text0 = text + tlen;
     if (n == 0U)
     {
       Hacl_HMAC_Blake2s_128_compute_blake2s_128(tag, prk, prklen, text0, infolen + 1U);
     }
     else
     {
+      memcpy(text, tag, tlen * sizeof (uint8_t));
       Hacl_HMAC_Blake2s_128_compute_blake2s_128(tag, prk, prklen, text, tlen + infolen + 1U);
     }
     uint8_t *block = okm + n * tlen;
     memcpy(block, tag, (len - n * tlen) * sizeof (uint8_t));
+    return;
   }
 }
 
diff --git a/src/Hacl_HMAC.c b/src/Hacl_HMAC.c
index b03bc7ac..66e18c5a 100644
--- a/src/Hacl_HMAC.c
+++ b/src/Hacl_HMAC.c
@@ -26,19 +26,753 @@
 #include "internal/Hacl_HMAC.h"
 
 #include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Hash_SHA3.h"
 #include "internal/Hacl_Hash_SHA2.h"
 #include "internal/Hacl_Hash_SHA1.h"
+#include "internal/Hacl_Hash_MD5.h"
 #include "internal/Hacl_Hash_Blake2s.h"
 #include "internal/Hacl_Hash_Blake2b.h"
 
+/**
+Write the HMAC-MD5 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 byte.
+`dst` must point to 16 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_md5(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 16U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_MD5_hash_oneshot(nkey, key, key_len);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t s[4U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U };
+  if (data_len == 0U)
+  {
+    Hacl_Hash_MD5_update_last(s, 0ULL, ipad, 64U);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_MD5_update_multi(s, ipad, 1U);
+    Hacl_Hash_MD5_update_multi(s, full_blocks, n_blocks);
+    Hacl_Hash_MD5_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+  }
+  uint8_t *dst1 = ipad;
+  Hacl_Hash_MD5_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_MD5_init(s);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 16U / block_len;
+  uint32_t rem0 = 16U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 16U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_MD5_update_multi(s, opad, 1U);
+  Hacl_Hash_MD5_update_multi(s, full_blocks, n_blocks);
+  Hacl_Hash_MD5_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+  Hacl_Hash_MD5_finish(s, dst);
+}
+
 /**
 Write the HMAC-SHA-1 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
-The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 byte.
-`dst` must point to 20 bytes of memory.
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 byte.
+`dst` must point to 20 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha1(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 20U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA1_hash_oneshot(nkey, key, key_len);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t s[5U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U, 0xc3d2e1f0U };
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA1_update_last(s, 0ULL, ipad, 64U);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA1_update_multi(s, ipad, 1U);
+    Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
+    Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+  }
+  uint8_t *dst1 = ipad;
+  Hacl_Hash_SHA1_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_SHA1_init(s);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 20U / block_len;
+  uint32_t rem0 = 20U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 20U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA1_update_multi(s, opad, 1U);
+  Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
+  Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+  Hacl_Hash_SHA1_finish(s, dst);
+}
+
+/**
+Write the HMAC-SHA-2-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+`dst` must point to 28 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_224(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 28U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA2_hash_224(nkey, key, key_len);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t st[8U] = { 0U };
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint32_t x = Hacl_Hash_SHA2_h224[i];
+    uint32_t *os = st;
+    os[i] = x;);
+  uint32_t *s = st;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA2_sha224_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA2_sha224_update_nblocks(64U, ipad, s);
+    Hacl_Hash_SHA2_sha224_update_nblocks(n_blocks * 64U, full_blocks, s);
+    Hacl_Hash_SHA2_sha224_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
+      rem_len,
+      rem,
+      s);
+  }
+  uint8_t *dst1 = ipad;
+  Hacl_Hash_SHA2_sha224_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_SHA2_sha224_init(s);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 28U / block_len;
+  uint32_t rem0 = 28U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 28U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA2_sha224_update_nblocks(64U, opad, s);
+  Hacl_Hash_SHA2_sha224_update_nblocks(n_blocks * 64U, full_blocks, s);
+  Hacl_Hash_SHA2_sha224_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
+    rem_len,
+    rem,
+    s);
+  Hacl_Hash_SHA2_sha224_finish(s, dst);
+}
+
+/**
+Write the HMAC-SHA-2-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+`dst` must point to 32 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_256(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 32U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA2_hash_256(nkey, key, key_len);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t st[8U] = { 0U };
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = st;
+    os[i] = x;);
+  uint32_t *s = st;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA2_sha256_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA2_sha256_update_nblocks(64U, ipad, s);
+    Hacl_Hash_SHA2_sha256_update_nblocks(n_blocks * 64U, full_blocks, s);
+    Hacl_Hash_SHA2_sha256_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
+      rem_len,
+      rem,
+      s);
+  }
+  uint8_t *dst1 = ipad;
+  Hacl_Hash_SHA2_sha256_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_SHA2_sha256_init(s);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 32U / block_len;
+  uint32_t rem0 = 32U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 32U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA2_sha256_update_nblocks(64U, opad, s);
+  Hacl_Hash_SHA2_sha256_update_nblocks(n_blocks * 64U, full_blocks, s);
+  Hacl_Hash_SHA2_sha256_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
+    rem_len,
+    rem,
+    s);
+  Hacl_Hash_SHA2_sha256_finish(s, dst);
+}
+
+/**
+Write the HMAC-SHA-2-384 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 128 bytes.
+`dst` must point to 48 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_384(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 128U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 48U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 128U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA2_hash_384(nkey, key, key_len);
+  }
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint64_t st[8U] = { 0U };
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint64_t x = Hacl_Hash_SHA2_h384[i];
+    uint64_t *os = st;
+    os[i] = x;);
+  uint64_t *s = st;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
+        FStar_UInt128_uint64_to_uint128((uint64_t)128U)),
+      128U,
+      ipad,
+      s);
+  }
+  else
+  {
+    uint32_t block_len = 128U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA2_sha384_update_nblocks(128U, ipad, s);
+    Hacl_Hash_SHA2_sha384_update_nblocks(n_blocks * 128U, full_blocks, s);
+    Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
+          FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
+        FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
+      rem_len,
+      rem,
+      s);
+  }
+  uint8_t *dst1 = ipad;
+  Hacl_Hash_SHA2_sha384_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_SHA2_sha384_init(s);
+  uint32_t block_len = 128U;
+  uint32_t n_blocks0 = 48U / block_len;
+  uint32_t rem0 = 48U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 48U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA2_sha384_update_nblocks(128U, opad, s);
+  Hacl_Hash_SHA2_sha384_update_nblocks(n_blocks * 128U, full_blocks, s);
+  Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
+        FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
+      FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
+    rem_len,
+    rem,
+    s);
+  Hacl_Hash_SHA2_sha384_finish(s, dst);
+}
+
+/**
+Write the HMAC-SHA-2-512 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 128 bytes.
+`dst` must point to 64 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_512(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 128U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 64U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 128U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA2_hash_512(nkey, key, key_len);
+  }
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint64_t st[8U] = { 0U };
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint64_t x = Hacl_Hash_SHA2_h512[i];
+    uint64_t *os = st;
+    os[i] = x;);
+  uint64_t *s = st;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
+        FStar_UInt128_uint64_to_uint128((uint64_t)128U)),
+      128U,
+      ipad,
+      s);
+  }
+  else
+  {
+    uint32_t block_len = 128U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA2_sha512_update_nblocks(128U, ipad, s);
+    Hacl_Hash_SHA2_sha512_update_nblocks(n_blocks * 128U, full_blocks, s);
+    Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
+          FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
+        FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
+      rem_len,
+      rem,
+      s);
+  }
+  uint8_t *dst1 = ipad;
+  Hacl_Hash_SHA2_sha512_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_SHA2_sha512_init(s);
+  uint32_t block_len = 128U;
+  uint32_t n_blocks0 = 64U / block_len;
+  uint32_t rem0 = 64U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 64U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA2_sha512_update_nblocks(128U, opad, s);
+  Hacl_Hash_SHA2_sha512_update_nblocks(n_blocks * 128U, full_blocks, s);
+  Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
+        FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
+      FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
+    rem_len,
+    rem,
+    s);
+  Hacl_Hash_SHA2_sha512_finish(s, dst);
+}
+
+/**
+Write the HMAC-SHA-3-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 144 bytes.
+`dst` must point to 28 bytes of memory.
 */
 void
-Hacl_HMAC_compute_sha1(
+Hacl_HMAC_compute_sha3_224(
   uint8_t *dst,
   uint8_t *key,
   uint32_t key_len,
@@ -46,57 +780,52 @@ Hacl_HMAC_compute_sha1(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[144U];
+  memset(key_block, 0U, 144U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
-  if (key_len <= 64U)
+  if (key_len <= 144U)
   {
     ite = key_len;
   }
   else
   {
-    ite = 20U;
+    ite = 28U;
   }
   uint8_t *zeroes = key_block + ite;
   KRML_MAYBE_UNUSED_VAR(zeroes);
-  if (key_len <= 64U)
+  if (key_len <= 144U)
   {
     memcpy(nkey, key, key_len * sizeof (uint8_t));
   }
   else
   {
-    Hacl_Hash_SHA1_hash_oneshot(nkey, key, key_len);
+    Hacl_Hash_SHA3_sha3_224(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[144U];
+  memset(ipad, 0x36U, 144U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 144U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[144U];
+  memset(opad, 0x5cU, 144U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 144U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  uint32_t s[5U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U, 0xc3d2e1f0U };
-  uint8_t *dst1 = ipad;
+  uint64_t s[25U] = { 0U };
   if (data_len == 0U)
   {
-    Hacl_Hash_SHA1_update_last(s, 0ULL, ipad, 64U);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_224, s, ipad, 144U);
   }
   else
   {
-    uint32_t block_len = 64U;
+    uint32_t block_len = 144U;
     uint32_t n_blocks0 = data_len / block_len;
     uint32_t rem0 = data_len % block_len;
     K___uint32_t_uint32_t scrut;
@@ -114,21 +843,30 @@ Hacl_HMAC_compute_sha1(
     uint32_t full_blocks_len = n_blocks * block_len;
     uint8_t *full_blocks = data;
     uint8_t *rem = data + full_blocks_len;
-    Hacl_Hash_SHA1_update_multi(s, ipad, 1U);
-    Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
-    Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_224, s, rem, rem_len);
   }
-  Hacl_Hash_SHA1_finish(s, dst1);
+  uint8_t *dst1 = ipad;
+  uint32_t remOut = 28U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 28U - remOut, hbuf0, remOut * sizeof (uint8_t));
   uint8_t *hash1 = ipad;
-  Hacl_Hash_SHA1_init(s);
-  uint32_t block_len = 64U;
-  uint32_t n_blocks0 = 20U / block_len;
-  uint32_t rem0 = 20U % block_len;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 144U;
+  uint32_t n_blocks0 = 28U / block_len;
+  uint32_t rem0 = 28U % block_len;
   K___uint32_t_uint32_t scrut;
   if (n_blocks0 > 0U && rem0 == 0U)
   {
     uint32_t n_blocks_ = n_blocks0 - 1U;
-    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 20U - n_blocks_ * block_len });
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 28U - n_blocks_ * block_len });
   }
   else
   {
@@ -139,20 +877,28 @@ Hacl_HMAC_compute_sha1(
   uint32_t full_blocks_len = n_blocks * block_len;
   uint8_t *full_blocks = hash1;
   uint8_t *rem = hash1 + full_blocks_len;
-  Hacl_Hash_SHA1_update_multi(s, opad, 1U);
-  Hacl_Hash_SHA1_update_multi(s, full_blocks, n_blocks);
-  Hacl_Hash_SHA1_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
-  Hacl_Hash_SHA1_finish(s, dst);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_224, s, rem, rem_len);
+  uint32_t remOut0 = 28U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 28U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
 }
 
 /**
-Write the HMAC-SHA-2-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
+Write the HMAC-SHA-3-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
-The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 136 bytes.
 `dst` must point to 32 bytes of memory.
 */
 void
-Hacl_HMAC_compute_sha2_256(
+Hacl_HMAC_compute_sha3_256(
   uint8_t *dst,
   uint8_t *key,
   uint32_t key_len,
@@ -160,13 +906,11 @@ Hacl_HMAC_compute_sha2_256(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[136U];
+  memset(key_block, 0U, 136U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
-  if (key_len <= 64U)
+  if (key_len <= 136U)
   {
     ite = key_len;
   }
@@ -176,49 +920,38 @@ Hacl_HMAC_compute_sha2_256(
   }
   uint8_t *zeroes = key_block + ite;
   KRML_MAYBE_UNUSED_VAR(zeroes);
-  if (key_len <= 64U)
+  if (key_len <= 136U)
   {
     memcpy(nkey, key, key_len * sizeof (uint8_t));
   }
   else
   {
-    Hacl_Hash_SHA2_hash_256(nkey, key, key_len);
+    Hacl_Hash_SHA3_sha3_256(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[136U];
+  memset(ipad, 0x36U, 136U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 136U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[136U];
+  memset(opad, 0x5cU, 136U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 136U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  uint32_t st[8U] = { 0U };
-  KRML_MAYBE_FOR8(i,
-    0U,
-    8U,
-    1U,
-    uint32_t *os = st;
-    uint32_t x = Hacl_Hash_SHA2_h256[i];
-    os[i] = x;);
-  uint32_t *s = st;
-  uint8_t *dst1 = ipad;
+  uint64_t s[25U] = { 0U };
   if (data_len == 0U)
   {
-    Hacl_Hash_SHA2_sha256_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_256, s, ipad, 136U);
   }
   else
   {
-    uint32_t block_len = 64U;
+    uint32_t block_len = 136U;
     uint32_t n_blocks0 = data_len / block_len;
     uint32_t rem0 = data_len % block_len;
     K___uint32_t_uint32_t scrut;
@@ -236,17 +969,23 @@ Hacl_HMAC_compute_sha2_256(
     uint32_t full_blocks_len = n_blocks * block_len;
     uint8_t *full_blocks = data;
     uint8_t *rem = data + full_blocks_len;
-    Hacl_Hash_SHA2_sha256_update_nblocks(64U, ipad, s);
-    Hacl_Hash_SHA2_sha256_update_nblocks(n_blocks * 64U, full_blocks, s);
-    Hacl_Hash_SHA2_sha256_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
-      rem_len,
-      rem,
-      s);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_256, s, rem, rem_len);
   }
-  Hacl_Hash_SHA2_sha256_finish(s, dst1);
+  uint8_t *dst1 = ipad;
+  uint32_t remOut = 32U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 32U - remOut, hbuf0, remOut * sizeof (uint8_t));
   uint8_t *hash1 = ipad;
-  Hacl_Hash_SHA2_sha256_init(s);
-  uint32_t block_len = 64U;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 136U;
   uint32_t n_blocks0 = 32U / block_len;
   uint32_t rem0 = 32U % block_len;
   K___uint32_t_uint32_t scrut;
@@ -264,23 +1003,28 @@ Hacl_HMAC_compute_sha2_256(
   uint32_t full_blocks_len = n_blocks * block_len;
   uint8_t *full_blocks = hash1;
   uint8_t *rem = hash1 + full_blocks_len;
-  Hacl_Hash_SHA2_sha256_update_nblocks(64U, opad, s);
-  Hacl_Hash_SHA2_sha256_update_nblocks(n_blocks * 64U, full_blocks, s);
-  Hacl_Hash_SHA2_sha256_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
-    rem_len,
-    rem,
-    s);
-  Hacl_Hash_SHA2_sha256_finish(s, dst);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_256, s, rem, rem_len);
+  uint32_t remOut0 = 32U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 32U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
 }
 
 /**
-Write the HMAC-SHA-2-384 MAC of a message (`data`) by using a key (`key`) into `dst`.
+Write the HMAC-SHA-3-384 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
-The key can be any length and will be hashed if it is longer and padded if it is shorter than 128 bytes.
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 104 bytes.
 `dst` must point to 48 bytes of memory.
 */
 void
-Hacl_HMAC_compute_sha2_384(
+Hacl_HMAC_compute_sha3_384(
   uint8_t *dst,
   uint8_t *key,
   uint32_t key_len,
@@ -288,13 +1032,11 @@ Hacl_HMAC_compute_sha2_384(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[104U];
+  memset(key_block, 0U, 104U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
-  if (key_len <= 128U)
+  if (key_len <= 104U)
   {
     ite = key_len;
   }
@@ -304,53 +1046,38 @@ Hacl_HMAC_compute_sha2_384(
   }
   uint8_t *zeroes = key_block + ite;
   KRML_MAYBE_UNUSED_VAR(zeroes);
-  if (key_len <= 128U)
+  if (key_len <= 104U)
   {
     memcpy(nkey, key, key_len * sizeof (uint8_t));
   }
   else
   {
-    Hacl_Hash_SHA2_hash_384(nkey, key, key_len);
+    Hacl_Hash_SHA3_sha3_384(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[104U];
+  memset(ipad, 0x36U, 104U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 104U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[104U];
+  memset(opad, 0x5cU, 104U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 104U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  uint64_t st[8U] = { 0U };
-  KRML_MAYBE_FOR8(i,
-    0U,
-    8U,
-    1U,
-    uint64_t *os = st;
-    uint64_t x = Hacl_Hash_SHA2_h384[i];
-    os[i] = x;);
-  uint64_t *s = st;
-  uint8_t *dst1 = ipad;
+  uint64_t s[25U] = { 0U };
   if (data_len == 0U)
   {
-    Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
-        FStar_UInt128_uint64_to_uint128((uint64_t)128U)),
-      128U,
-      ipad,
-      s);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_384, s, ipad, 104U);
   }
   else
   {
-    uint32_t block_len = 128U;
+    uint32_t block_len = 104U;
     uint32_t n_blocks0 = data_len / block_len;
     uint32_t rem0 = data_len % block_len;
     K___uint32_t_uint32_t scrut;
@@ -368,19 +1095,23 @@ Hacl_HMAC_compute_sha2_384(
     uint32_t full_blocks_len = n_blocks * block_len;
     uint8_t *full_blocks = data;
     uint8_t *rem = data + full_blocks_len;
-    Hacl_Hash_SHA2_sha384_update_nblocks(128U, ipad, s);
-    Hacl_Hash_SHA2_sha384_update_nblocks(n_blocks * 128U, full_blocks, s);
-    Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
-          FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
-        FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
-      rem_len,
-      rem,
-      s);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_384, s, rem, rem_len);
   }
-  Hacl_Hash_SHA2_sha384_finish(s, dst1);
+  uint8_t *dst1 = ipad;
+  uint32_t remOut = 48U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 48U - remOut, hbuf0, remOut * sizeof (uint8_t));
   uint8_t *hash1 = ipad;
-  Hacl_Hash_SHA2_sha384_init(s);
-  uint32_t block_len = 128U;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 104U;
   uint32_t n_blocks0 = 48U / block_len;
   uint32_t rem0 = 48U % block_len;
   K___uint32_t_uint32_t scrut;
@@ -398,25 +1129,28 @@ Hacl_HMAC_compute_sha2_384(
   uint32_t full_blocks_len = n_blocks * block_len;
   uint8_t *full_blocks = hash1;
   uint8_t *rem = hash1 + full_blocks_len;
-  Hacl_Hash_SHA2_sha384_update_nblocks(128U, opad, s);
-  Hacl_Hash_SHA2_sha384_update_nblocks(n_blocks * 128U, full_blocks, s);
-  Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
-        FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
-      FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
-    rem_len,
-    rem,
-    s);
-  Hacl_Hash_SHA2_sha384_finish(s, dst);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_384, s, rem, rem_len);
+  uint32_t remOut0 = 48U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 48U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
 }
 
 /**
-Write the HMAC-SHA-2-512 MAC of a message (`data`) by using a key (`key`) into `dst`.
+Write the HMAC-SHA-3-512 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
-The key can be any length and will be hashed if it is longer and padded if it is shorter than 128 bytes.
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 72 bytes.
 `dst` must point to 64 bytes of memory.
 */
 void
-Hacl_HMAC_compute_sha2_512(
+Hacl_HMAC_compute_sha3_512(
   uint8_t *dst,
   uint8_t *key,
   uint32_t key_len,
@@ -424,13 +1158,11 @@ Hacl_HMAC_compute_sha2_512(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[72U];
+  memset(key_block, 0U, 72U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
-  if (key_len <= 128U)
+  if (key_len <= 72U)
   {
     ite = key_len;
   }
@@ -440,53 +1172,38 @@ Hacl_HMAC_compute_sha2_512(
   }
   uint8_t *zeroes = key_block + ite;
   KRML_MAYBE_UNUSED_VAR(zeroes);
-  if (key_len <= 128U)
+  if (key_len <= 72U)
   {
     memcpy(nkey, key, key_len * sizeof (uint8_t));
   }
   else
   {
-    Hacl_Hash_SHA2_hash_512(nkey, key, key_len);
+    Hacl_Hash_SHA3_sha3_512(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[72U];
+  memset(ipad, 0x36U, 72U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 72U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[72U];
+  memset(opad, 0x5cU, 72U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 72U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  uint64_t st[8U] = { 0U };
-  KRML_MAYBE_FOR8(i,
-    0U,
-    8U,
-    1U,
-    uint64_t *os = st;
-    uint64_t x = Hacl_Hash_SHA2_h512[i];
-    os[i] = x;);
-  uint64_t *s = st;
-  uint8_t *dst1 = ipad;
+  uint64_t s[25U] = { 0U };
   if (data_len == 0U)
   {
-    Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(0ULL),
-        FStar_UInt128_uint64_to_uint128((uint64_t)128U)),
-      128U,
-      ipad,
-      s);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_512, s, ipad, 72U);
   }
   else
   {
-    uint32_t block_len = 128U;
+    uint32_t block_len = 72U;
     uint32_t n_blocks0 = data_len / block_len;
     uint32_t rem0 = data_len % block_len;
     K___uint32_t_uint32_t scrut;
@@ -504,19 +1221,23 @@ Hacl_HMAC_compute_sha2_512(
     uint32_t full_blocks_len = n_blocks * block_len;
     uint8_t *full_blocks = data;
     uint8_t *rem = data + full_blocks_len;
-    Hacl_Hash_SHA2_sha512_update_nblocks(128U, ipad, s);
-    Hacl_Hash_SHA2_sha512_update_nblocks(n_blocks * 128U, full_blocks, s);
-    Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
-          FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
-        FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
-      rem_len,
-      rem,
-      s);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_512, s, rem, rem_len);
   }
-  Hacl_Hash_SHA2_sha512_finish(s, dst1);
+  uint8_t *dst1 = ipad;
+  uint32_t remOut = 64U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 64U - remOut, hbuf0, remOut * sizeof (uint8_t));
   uint8_t *hash1 = ipad;
-  Hacl_Hash_SHA2_sha512_init(s);
-  uint32_t block_len = 128U;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 72U;
   uint32_t n_blocks0 = 64U / block_len;
   uint32_t rem0 = 64U % block_len;
   K___uint32_t_uint32_t scrut;
@@ -534,15 +1255,18 @@ Hacl_HMAC_compute_sha2_512(
   uint32_t full_blocks_len = n_blocks * block_len;
   uint8_t *full_blocks = hash1;
   uint8_t *rem = hash1 + full_blocks_len;
-  Hacl_Hash_SHA2_sha512_update_nblocks(128U, opad, s);
-  Hacl_Hash_SHA2_sha512_update_nblocks(n_blocks * 128U, full_blocks, s);
-  Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
-        FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
-      FStar_UInt128_uint64_to_uint128((uint64_t)rem_len)),
-    rem_len,
-    rem,
-    s);
-  Hacl_Hash_SHA2_sha512_finish(s, dst);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_512, s, rem, rem_len);
+  uint32_t remOut0 = 64U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 64U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
 }
 
 /**
@@ -560,10 +1284,8 @@ Hacl_HMAC_compute_blake2s_32(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -584,19 +1306,17 @@ Hacl_HMAC_compute_blake2s_32(
   {
     Hacl_Hash_Blake2s_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -605,11 +1325,10 @@ Hacl_HMAC_compute_blake2s_32(
   uint32_t s[16U] = { 0U };
   Hacl_Hash_Blake2s_init(s, 0U, 32U);
   uint32_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -644,10 +1363,12 @@ Hacl_HMAC_compute_blake2s_32(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2s_finish(32U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2s_init(s0, 0U, 32U);
@@ -682,6 +1403,7 @@ Hacl_HMAC_compute_blake2s_32(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -703,10 +1425,8 @@ Hacl_HMAC_compute_blake2b_32(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -727,19 +1447,17 @@ Hacl_HMAC_compute_blake2b_32(
   {
     Hacl_Hash_Blake2b_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -748,11 +1466,16 @@ Hacl_HMAC_compute_blake2b_32(
   uint64_t s[16U] = { 0U };
   Hacl_Hash_Blake2b_init(s, 0U, 64U);
   uint64_t *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -787,11 +1510,13 @@ Hacl_HMAC_compute_blake2b_32(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2b_finish(64U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2b_init(s0, 0U, 64U);
@@ -826,6 +1551,7 @@ Hacl_HMAC_compute_blake2b_32(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/Hacl_HMAC_Blake2b_256.c b/src/Hacl_HMAC_Blake2b_256.c
index 6197490a..9be9fe7f 100644
--- a/src/Hacl_HMAC_Blake2b_256.c
+++ b/src/Hacl_HMAC_Blake2b_256.c
@@ -44,10 +44,8 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -68,19 +66,17 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   {
     Hacl_Hash_Blake2b_Simd256_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -89,13 +85,13 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 s[4U] KRML_POST_ALIGN(32) = { 0U };
   Hacl_Hash_Blake2b_Simd256_init(s, 0U, 64U);
   Lib_IntVector_Intrinsics_vec256 *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 wv[4U] KRML_POST_ALIGN(32) = { 0U };
     Hacl_Hash_Blake2b_Simd256_update_last(128U,
       wv,
       s0,
+      false,
       FStar_UInt128_uint64_to_uint128(0ULL),
       128U,
       ipad);
@@ -138,11 +134,13 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
     Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2b_Simd256_finish(64U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2b_Simd256_init(s0, 0U, 64U);
@@ -182,6 +180,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/Hacl_HMAC_Blake2s_128.c b/src/Hacl_HMAC_Blake2s_128.c
index 0741bffb..76cc2b62 100644
--- a/src/Hacl_HMAC_Blake2s_128.c
+++ b/src/Hacl_HMAC_Blake2s_128.c
@@ -43,10 +43,8 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t key_block[l];
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -67,19 +65,17 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   {
     Hacl_Hash_Blake2s_Simd128_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t ipad[l];
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t opad[l];
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -88,11 +84,10 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 s[4U] KRML_POST_ALIGN(16) = { 0U };
   Hacl_Hash_Blake2s_Simd128_init(s, 0U, 32U);
   Lib_IntVector_Intrinsics_vec128 *s0 = s;
-  uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -127,10 +122,12 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
     Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
   }
+  uint8_t *dst1 = ipad;
   Hacl_Hash_Blake2s_Simd128_finish(32U, dst1, s0);
   uint8_t *hash1 = ipad;
   Hacl_Hash_Blake2s_Simd128_init(s0, 0U, 32U);
@@ -165,6 +162,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
diff --git a/src/Hacl_Hash_Blake2b.c b/src/Hacl_Hash_Blake2b.c
index d490a1a5..980b9997 100644
--- a/src/Hacl_Hash_Blake2b.c
+++ b/src/Hacl_Hash_Blake2b.c
@@ -29,18 +29,25 @@
 #include "lib_memzero0.h"
 
 static void
-update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totlen, uint8_t *d)
+update_block(
+  uint64_t *wv,
+  uint64_t *hash,
+  bool flag,
+  bool last_node,
+  FStar_UInt128_uint128 totlen,
+  uint8_t *d
+)
 {
   uint64_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    uint64_t *os = m_w;
     uint8_t *bj = d + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = m_w;
     os[i] = x;);
   uint64_t mask[4U] = { 0U };
   uint64_t wv_14;
@@ -52,7 +59,15 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask[0U] = FStar_UInt128_uint128_to_uint64(totlen);
   mask[1U] = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U));
   mask[2U] = wv_14;
@@ -63,8 +78,8 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
     0U,
     4U,
     1U,
-    uint64_t *os = wv3;
     uint64_t x = wv3[i] ^ mask[i];
+    uint64_t *os = wv3;
     os[i] = x;);
   KRML_MAYBE_FOR12(i0,
     0U,
@@ -124,131 +139,127 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
     uint64_t *y = m_st + 4U;
     uint64_t *z = m_st + 8U;
     uint64_t *w = m_st + 12U;
-    uint32_t a = 0U;
-    uint32_t b0 = 1U;
-    uint32_t c0 = 2U;
-    uint32_t d10 = 3U;
-    uint64_t *wv_a0 = wv + a * 4U;
-    uint64_t *wv_b0 = wv + b0 * 4U;
+    uint64_t *wv_a = wv;
+    uint64_t *wv_b0 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a0;
-      uint64_t x1 = wv_a0[i] + wv_b0[i];
+      uint64_t x1 = wv_a[i] + wv_b0[i];
+      uint64_t *os = wv_a;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a0;
-      uint64_t x1 = wv_a0[i] + x[i];
+      uint64_t x1 = wv_a[i] + x[i];
+      uint64_t *os = wv_a;
       os[i] = x1;);
-    uint64_t *wv_a1 = wv + d10 * 4U;
-    uint64_t *wv_b1 = wv + a * 4U;
+    uint64_t *wv_a0 = wv + 12U;
+    uint64_t *wv_b1 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a1;
-      uint64_t x1 = wv_a1[i] ^ wv_b1[i];
+      uint64_t x1 = wv_a0[i] ^ wv_b1[i];
+      uint64_t *os = wv_a0;
       os[i] = x1;);
-    uint64_t *r10 = wv_a1;
+    uint64_t *r10 = wv_a0;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r10;
       uint64_t x1 = r10[i];
       uint64_t x10 = x1 >> 32U | x1 << 32U;
+      uint64_t *os = r10;
       os[i] = x10;);
-    uint64_t *wv_a2 = wv + c0 * 4U;
-    uint64_t *wv_b2 = wv + d10 * 4U;
+    uint64_t *wv_a1 = wv + 8U;
+    uint64_t *wv_b2 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a2;
-      uint64_t x1 = wv_a2[i] + wv_b2[i];
+      uint64_t x1 = wv_a1[i] + wv_b2[i];
+      uint64_t *os = wv_a1;
       os[i] = x1;);
-    uint64_t *wv_a3 = wv + b0 * 4U;
-    uint64_t *wv_b3 = wv + c0 * 4U;
+    uint64_t *wv_a2 = wv + 4U;
+    uint64_t *wv_b3 = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a3;
-      uint64_t x1 = wv_a3[i] ^ wv_b3[i];
+      uint64_t x1 = wv_a2[i] ^ wv_b3[i];
+      uint64_t *os = wv_a2;
       os[i] = x1;);
-    uint64_t *r12 = wv_a3;
+    uint64_t *r12 = wv_a2;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r12;
       uint64_t x1 = r12[i];
       uint64_t x10 = x1 >> 24U | x1 << 40U;
+      uint64_t *os = r12;
       os[i] = x10;);
-    uint64_t *wv_a4 = wv + a * 4U;
-    uint64_t *wv_b4 = wv + b0 * 4U;
+    uint64_t *wv_a3 = wv;
+    uint64_t *wv_b4 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a4;
-      uint64_t x1 = wv_a4[i] + wv_b4[i];
+      uint64_t x1 = wv_a3[i] + wv_b4[i];
+      uint64_t *os = wv_a3;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a4;
-      uint64_t x1 = wv_a4[i] + y[i];
+      uint64_t x1 = wv_a3[i] + y[i];
+      uint64_t *os = wv_a3;
       os[i] = x1;);
-    uint64_t *wv_a5 = wv + d10 * 4U;
-    uint64_t *wv_b5 = wv + a * 4U;
+    uint64_t *wv_a4 = wv + 12U;
+    uint64_t *wv_b5 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a5;
-      uint64_t x1 = wv_a5[i] ^ wv_b5[i];
+      uint64_t x1 = wv_a4[i] ^ wv_b5[i];
+      uint64_t *os = wv_a4;
       os[i] = x1;);
-    uint64_t *r13 = wv_a5;
+    uint64_t *r13 = wv_a4;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r13;
       uint64_t x1 = r13[i];
       uint64_t x10 = x1 >> 16U | x1 << 48U;
+      uint64_t *os = r13;
       os[i] = x10;);
-    uint64_t *wv_a6 = wv + c0 * 4U;
-    uint64_t *wv_b6 = wv + d10 * 4U;
+    uint64_t *wv_a5 = wv + 8U;
+    uint64_t *wv_b6 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a6;
-      uint64_t x1 = wv_a6[i] + wv_b6[i];
+      uint64_t x1 = wv_a5[i] + wv_b6[i];
+      uint64_t *os = wv_a5;
       os[i] = x1;);
-    uint64_t *wv_a7 = wv + b0 * 4U;
-    uint64_t *wv_b7 = wv + c0 * 4U;
+    uint64_t *wv_a6 = wv + 4U;
+    uint64_t *wv_b7 = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a7;
-      uint64_t x1 = wv_a7[i] ^ wv_b7[i];
+      uint64_t x1 = wv_a6[i] ^ wv_b7[i];
+      uint64_t *os = wv_a6;
       os[i] = x1;);
-    uint64_t *r14 = wv_a7;
+    uint64_t *r14 = wv_a6;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r14;
       uint64_t x1 = r14[i];
       uint64_t x10 = x1 >> 63U | x1 << 1U;
+      uint64_t *os = r14;
       os[i] = x10;);
     uint64_t *r15 = wv + 4U;
     uint64_t *r21 = wv + 8U;
@@ -280,131 +291,127 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
     r112[1U] = x12;
     r112[2U] = x22;
     r112[3U] = x32;
-    uint32_t a0 = 0U;
-    uint32_t b = 1U;
-    uint32_t c = 2U;
-    uint32_t d1 = 3U;
-    uint64_t *wv_a = wv + a0 * 4U;
-    uint64_t *wv_b8 = wv + b * 4U;
+    uint64_t *wv_a7 = wv;
+    uint64_t *wv_b8 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a;
-      uint64_t x1 = wv_a[i] + wv_b8[i];
+      uint64_t x1 = wv_a7[i] + wv_b8[i];
+      uint64_t *os = wv_a7;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a;
-      uint64_t x1 = wv_a[i] + z[i];
+      uint64_t x1 = wv_a7[i] + z[i];
+      uint64_t *os = wv_a7;
       os[i] = x1;);
-    uint64_t *wv_a8 = wv + d1 * 4U;
-    uint64_t *wv_b9 = wv + a0 * 4U;
+    uint64_t *wv_a8 = wv + 12U;
+    uint64_t *wv_b9 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a8;
       uint64_t x1 = wv_a8[i] ^ wv_b9[i];
+      uint64_t *os = wv_a8;
       os[i] = x1;);
     uint64_t *r16 = wv_a8;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r16;
       uint64_t x1 = r16[i];
       uint64_t x13 = x1 >> 32U | x1 << 32U;
+      uint64_t *os = r16;
       os[i] = x13;);
-    uint64_t *wv_a9 = wv + c * 4U;
-    uint64_t *wv_b10 = wv + d1 * 4U;
+    uint64_t *wv_a9 = wv + 8U;
+    uint64_t *wv_b10 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a9;
       uint64_t x1 = wv_a9[i] + wv_b10[i];
+      uint64_t *os = wv_a9;
       os[i] = x1;);
-    uint64_t *wv_a10 = wv + b * 4U;
-    uint64_t *wv_b11 = wv + c * 4U;
+    uint64_t *wv_a10 = wv + 4U;
+    uint64_t *wv_b11 = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a10;
       uint64_t x1 = wv_a10[i] ^ wv_b11[i];
+      uint64_t *os = wv_a10;
       os[i] = x1;);
     uint64_t *r17 = wv_a10;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r17;
       uint64_t x1 = r17[i];
       uint64_t x13 = x1 >> 24U | x1 << 40U;
+      uint64_t *os = r17;
       os[i] = x13;);
-    uint64_t *wv_a11 = wv + a0 * 4U;
-    uint64_t *wv_b12 = wv + b * 4U;
+    uint64_t *wv_a11 = wv;
+    uint64_t *wv_b12 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a11;
       uint64_t x1 = wv_a11[i] + wv_b12[i];
+      uint64_t *os = wv_a11;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a11;
       uint64_t x1 = wv_a11[i] + w[i];
+      uint64_t *os = wv_a11;
       os[i] = x1;);
-    uint64_t *wv_a12 = wv + d1 * 4U;
-    uint64_t *wv_b13 = wv + a0 * 4U;
+    uint64_t *wv_a12 = wv + 12U;
+    uint64_t *wv_b13 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a12;
       uint64_t x1 = wv_a12[i] ^ wv_b13[i];
+      uint64_t *os = wv_a12;
       os[i] = x1;);
     uint64_t *r18 = wv_a12;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r18;
       uint64_t x1 = r18[i];
       uint64_t x13 = x1 >> 16U | x1 << 48U;
+      uint64_t *os = r18;
       os[i] = x13;);
-    uint64_t *wv_a13 = wv + c * 4U;
-    uint64_t *wv_b14 = wv + d1 * 4U;
+    uint64_t *wv_a13 = wv + 8U;
+    uint64_t *wv_b14 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a13;
       uint64_t x1 = wv_a13[i] + wv_b14[i];
+      uint64_t *os = wv_a13;
       os[i] = x1;);
-    uint64_t *wv_a14 = wv + b * 4U;
-    uint64_t *wv_b = wv + c * 4U;
+    uint64_t *wv_a14 = wv + 4U;
+    uint64_t *wv_b = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = wv_a14;
       uint64_t x1 = wv_a14[i] ^ wv_b[i];
+      uint64_t *os = wv_a14;
       os[i] = x1;);
     uint64_t *r19 = wv_a14;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint64_t *os = r19;
       uint64_t x1 = r19[i];
       uint64_t x13 = x1 >> 63U | x1 << 1U;
+      uint64_t *os = r19;
       os[i] = x13;);
     uint64_t *r113 = wv + 4U;
     uint64_t *r2 = wv + 8U;
@@ -446,29 +453,29 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
     0U,
     4U,
     1U,
-    uint64_t *os = s0;
     uint64_t x = s0[i] ^ r0[i];
+    uint64_t *os = s0;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = s0;
     uint64_t x = s0[i] ^ r2[i];
+    uint64_t *os = s0;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = s1;
     uint64_t x = s1[i] ^ r1[i];
+    uint64_t *os = s1;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = s1;
     uint64_t x = s1[i] ^ r3[i];
+    uint64_t *os = s1;
     os[i] = x;);
 }
 
@@ -505,25 +512,27 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
   r3[3U] = iv7;
   uint8_t kk1 = (uint8_t)kk;
   uint8_t nn1 = (uint8_t)nn;
+  uint64_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 4U;
     uint8_t *bj = p.salt + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____0;
     os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 6U;
     uint8_t *bj = p.personal + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint64_t)nn1
@@ -560,86 +569,6 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
   r1[3U] = iv7_;
 }
 
-static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint64_t tmp[8U] = { 0U };
-  uint64_t *r0 = hash;
-  uint64_t *r1 = hash + 4U;
-  uint64_t *r2 = hash + 8U;
-  uint64_t *r3 = hash + 12U;
-  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
-  r2[0U] = iv0;
-  r2[1U] = iv1;
-  r2[2U] = iv2;
-  r2[3U] = iv3;
-  r3[0U] = iv4;
-  r3[1U] = iv5;
-  r3[2U] = iv6;
-  r3[3U] = iv7;
-  uint8_t kk = p.key_length;
-  uint8_t nn = p.digest_length;
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint64_t)nn
-    ^
-      ((uint64_t)kk
-      << 8U
-      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
-  tmp[1U] = p.node_offset;
-  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
-  tmp[3U] = 0ULL;
-  uint64_t tmp0 = tmp[0U];
-  uint64_t tmp1 = tmp[1U];
-  uint64_t tmp2 = tmp[2U];
-  uint64_t tmp3 = tmp[3U];
-  uint64_t tmp4 = tmp[4U];
-  uint64_t tmp5 = tmp[5U];
-  uint64_t tmp6 = tmp[6U];
-  uint64_t tmp7 = tmp[7U];
-  uint64_t iv0_ = iv0 ^ tmp0;
-  uint64_t iv1_ = iv1 ^ tmp1;
-  uint64_t iv2_ = iv2 ^ tmp2;
-  uint64_t iv3_ = iv3 ^ tmp3;
-  uint64_t iv4_ = iv4 ^ tmp4;
-  uint64_t iv5_ = iv5 ^ tmp5;
-  uint64_t iv6_ = iv6 ^ tmp6;
-  uint64_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = iv0_;
-  r0[1U] = iv1_;
-  r0[2U] = iv2_;
-  r0[3U] = iv3_;
-  r1[0U] = iv4_;
-  r1[1U] = iv5_;
-  r1[2U] = iv6_;
-  r1[3U] = iv7_;
-}
-
 static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
 {
   FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U);
@@ -647,11 +576,11 @@ static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -674,7 +603,7 @@ Hacl_Hash_Blake2b_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -683,6 +612,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -693,7 +623,7 @@ Hacl_Hash_Blake2b_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -727,7 +657,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -756,22 +686,115 @@ void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash)
   uint64_t *row1 = hash + 4U;
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store64_le(first + i * 8U, row0[i]););
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store64_le(second + i * 8U, row1[i]););
+  KRML_MAYBE_UNUSED_VAR(b);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
 static Hacl_Hash_Blake2b_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    { .fst = kk.key_length, .snd = kk.digest_length, .thd = kk.last_node, .f3 = wv, .f4 = b };
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint64_t *h = block_state.f4;
+  uint32_t kk20 = (uint32_t)i0.key_length;
+  uint8_t *k_ = key.snd;
+  if (!(kk20 == 0U))
+  {
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_, kk20 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = h;
+  uint64_t *r1 = h + 4U;
+  uint64_t *r2 = h + 8U;
+  uint64_t *r3 = h + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  uint64_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    uint64_t *os = uu____0;
+    os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    uint64_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -785,23 +808,9 @@ static Hacl_Hash_Blake2b_state_t
   Hacl_Hash_Blake2b_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2b_state_t
-  *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t));
-  p[0U] = s;
-  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
-  uint8_t kk1 = p1->key_length;
-  uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i.key_length;
-  uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
-  {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
-  }
-  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
-  return p;
+  *p0 = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t));
+  p0[0U] = s;
+  return p0;
 }
 
 /**
@@ -820,14 +829,16 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -844,7 +855,7 @@ The caller must satisfy the following requirements.
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
   uint8_t salt[16U] = { 0U };
   uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
@@ -855,7 +866,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, k);
+  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
@@ -872,39 +883,117 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s)
 {
   Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2b_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
-  Hacl_Hash_Blake2b_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
+  Hacl_Hash_Blake2b_block_state_t block_state = (*state).block_state;
+  uint8_t *buf = (*state).buf;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
-  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i1.key_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint64_t *h = block_state.f4;
+  uint32_t kk20 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
+  if (!(kk20 == 0U))
   {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk20 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
-  uint8_t kk11 = i.key_length;
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = h;
+  uint64_t *r1 = h + 4U;
+  uint64_t *r2 = h + 8U;
+  uint64_t *r3 = h + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  uint64_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    uint64_t *os = uu____0;
+    os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    uint64_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+  uint8_t kk11 = i0.key_length;
   uint32_t ite;
   if (kk11 != 0U)
   {
@@ -914,14 +1003,13 @@ reset_raw(
   {
     ite = 0U;
   }
-  Hacl_Hash_Blake2b_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)ite;
+  state->total_len = total_len;
 }
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -933,8 +1021,9 @@ Hacl_Hash_Blake2b_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -957,7 +1046,7 @@ void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
@@ -979,8 +1068,8 @@ void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s)
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Hash_Blake2b_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Hacl_Hash_Blake2b_block_state_t block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -996,10 +1085,8 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
   }
   if (chunk_len <= 128U - sz)
   {
-    Hacl_Hash_Blake2b_state_t s1 = *state;
-    Hacl_Hash_Blake2b_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -1012,22 +1099,12 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Hash_Blake2b_state_t s1 = *state;
-    Hacl_Hash_Blake2b_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -1040,9 +1117,8 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
-      uint64_t *wv = acc.fst;
-      uint64_t *hash = acc.snd;
+      uint64_t *hash = block_state.f4;
+      uint64_t *wv = block_state.f3;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_update_multi(128U,
         wv,
@@ -1065,9 +1141,8 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
-    uint64_t *wv = acc.fst;
-    uint64_t *hash = acc.snd;
+    uint64_t *hash = block_state.f4;
+    uint64_t *wv = block_state.f3;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_update_multi(data1_len,
       wv,
@@ -1077,25 +1152,15 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
       nb);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 128U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Hash_Blake2b_state_t s1 = *state;
-    Hacl_Hash_Blake2b_block_state_t block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)128U == 0ULL && total_len10 > 0ULL)
     {
@@ -1105,22 +1170,12 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)128U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Hash_Blake2b_state_t s10 = *state;
-    Hacl_Hash_Blake2b_block_state_t block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -1133,15 +1188,14 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
-      uint64_t *wv = acc.fst;
-      uint64_t *hash = acc.snd;
+      uint64_t *hash = block_state.f4;
+      uint64_t *wv = block_state.f3;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_update_multi(128U,
         wv,
         hash,
         FStar_UInt128_uint64_to_uint128(prevlen),
-        buf,
+        buf0,
         nb);
     }
     uint32_t ite;
@@ -1159,9 +1213,8 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
-    uint64_t *wv = acc.fst;
-    uint64_t *hash = acc.snd;
+    uint64_t *hash = block_state.f4;
+    uint64_t *wv = block_state.f3;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_update_multi(data1_len,
       wv,
@@ -1169,17 +1222,9 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
       FStar_UInt128_uint64_to_uint128(total_len1),
       data1,
       nb);
-    uint8_t *dst = buf;
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1190,19 +1235,22 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_state_t scrut = *state;
-  Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  uint8_t *buf_ = (*s).buf;
+  uint64_t total_len = (*s).total_len;
   uint32_t r;
   if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL)
   {
@@ -1217,11 +1265,12 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   uint64_t b[16U] = { 0U };
   Hacl_Hash_Blake2b_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint64_t *src_b = block_state.thd.snd;
-  uint64_t *dst_b = tmp_block_state.thd.snd;
+    { .fst = i1.key_length, .snd = i1.digest_length, .thd = i1.last_node, .f3 = wv0, .f4 = b };
+  uint64_t *src_b = block_state.f4;
+  uint64_t *dst_b = tmp_block_state.f4;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 128U == 0U && r > 0U)
   {
@@ -1232,10 +1281,8 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     ite = r % 128U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
-  K____uint64_t___uint64_t_ acc0 = tmp_block_state.thd;
-  uint64_t *wv1 = acc0.fst;
-  uint64_t *hash0 = acc0.snd;
+  uint64_t *hash0 = tmp_block_state.f4;
+  uint64_t *wv1 = tmp_block_state.f3;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2b_update_multi(0U,
     wv1,
@@ -1244,17 +1291,34 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint64_t___uint64_t_ acc = tmp_block_state.thd;
-  uint64_t *wv = acc.fst;
-  uint64_t *hash = acc.snd;
+  uint64_t *hash = tmp_block_state.f4;
+  uint64_t *wv = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Hacl_Hash_Blake2b_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_finish((uint32_t)nn1, dst, tmp_block_state.f4);
+  Hacl_Hash_Blake2b_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1265,8 +1329,8 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint64_t *b = block_state.thd.snd;
-  uint64_t *wv = block_state.thd.fst;
+  uint64_t *b = block_state.f4;
+  uint64_t *wv = block_state.f3;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1278,21 +1342,22 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
 */
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state)
 {
-  Hacl_Hash_Blake2b_state_t scrut = *state;
-  Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint64_t *src_b = block_state0.thd.snd;
-  uint64_t *dst_b = block_state.thd.snd;
+  block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = i.last_node, .f3 = wv, .f4 = b };
+  uint64_t *src_b = block_state0.f4;
+  uint64_t *dst_b = block_state.f4;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   Hacl_Hash_Blake2b_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1335,10 +1400,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
@@ -1371,25 +1436,27 @@ Hacl_Hash_Blake2b_hash_with_key_and_paramas(
   r3[3U] = iv7;
   uint8_t kk = params.key_length;
   uint8_t nn = params.digest_length;
+  uint64_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 4U;
     uint8_t *bj = params.salt + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____0;
     os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 6U;
     uint8_t *bj = params.personal + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint64_t)nn
diff --git a/src/Hacl_Hash_Blake2b_Simd256.c b/src/Hacl_Hash_Blake2b_Simd256.c
index 0afd93bc..fec92c90 100644
--- a/src/Hacl_Hash_Blake2b_Simd256.c
+++ b/src/Hacl_Hash_Blake2b_Simd256.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
   bool flag,
+  bool last_node,
   FStar_UInt128_uint128 totlen,
   uint8_t *d
 )
@@ -43,11 +44,11 @@ update_block(
     0U,
     16U,
     1U,
-    uint64_t *os = m_w;
     uint8_t *bj = d + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = m_w;
     os[i] = x;);
   Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_zero;
   uint64_t wv_14;
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec256_load64s(FStar_UInt128_uint128_to_uint64(totlen),
       FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U)),
@@ -102,40 +111,36 @@ update_block(
     Lib_IntVector_Intrinsics_vec256 *y = m_st + 1U;
     Lib_IntVector_Intrinsics_vec256 *z = m_st + 2U;
     Lib_IntVector_Intrinsics_vec256 *w = m_st + 3U;
-    uint32_t a = 0U;
-    uint32_t b0 = 1U;
-    uint32_t c0 = 2U;
-    uint32_t d10 = 3U;
-    Lib_IntVector_Intrinsics_vec256 *wv_a0 = wv + a * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b0 = wv + b0 * 1U;
-    wv_a0[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a0[0U], wv_b0[0U]);
-    wv_a0[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a0[0U], x[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a1 = wv + d10 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b1 = wv + a * 1U;
-    wv_a1[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a1[0U], wv_b1[0U]);
-    wv_a1[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a1[0U], 32U);
-    Lib_IntVector_Intrinsics_vec256 *wv_a2 = wv + c0 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b2 = wv + d10 * 1U;
-    wv_a2[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a2[0U], wv_b2[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a3 = wv + b0 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b3 = wv + c0 * 1U;
-    wv_a3[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a3[0U], wv_b3[0U]);
-    wv_a3[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a3[0U], 24U);
-    Lib_IntVector_Intrinsics_vec256 *wv_a4 = wv + a * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b4 = wv + b0 * 1U;
-    wv_a4[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a4[0U], wv_b4[0U]);
-    wv_a4[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a4[0U], y[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a5 = wv + d10 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b5 = wv + a * 1U;
-    wv_a5[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a5[0U], wv_b5[0U]);
-    wv_a5[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a5[0U], 16U);
-    Lib_IntVector_Intrinsics_vec256 *wv_a6 = wv + c0 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b6 = wv + d10 * 1U;
-    wv_a6[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a6[0U], wv_b6[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a7 = wv + b0 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b7 = wv + c0 * 1U;
-    wv_a7[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a7[0U], wv_b7[0U]);
-    wv_a7[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a7[0U], 63U);
+    Lib_IntVector_Intrinsics_vec256 *wv_a = wv;
+    Lib_IntVector_Intrinsics_vec256 *wv_b0 = wv + 1U;
+    wv_a[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a[0U], wv_b0[0U]);
+    wv_a[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a[0U], x[0U]);
+    Lib_IntVector_Intrinsics_vec256 *wv_a0 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b1 = wv;
+    wv_a0[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a0[0U], wv_b1[0U]);
+    wv_a0[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a0[0U], 32U);
+    Lib_IntVector_Intrinsics_vec256 *wv_a1 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b2 = wv + 3U;
+    wv_a1[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a1[0U], wv_b2[0U]);
+    Lib_IntVector_Intrinsics_vec256 *wv_a2 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b3 = wv + 2U;
+    wv_a2[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a2[0U], wv_b3[0U]);
+    wv_a2[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a2[0U], 24U);
+    Lib_IntVector_Intrinsics_vec256 *wv_a3 = wv;
+    Lib_IntVector_Intrinsics_vec256 *wv_b4 = wv + 1U;
+    wv_a3[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a3[0U], wv_b4[0U]);
+    wv_a3[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a3[0U], y[0U]);
+    Lib_IntVector_Intrinsics_vec256 *wv_a4 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b5 = wv;
+    wv_a4[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a4[0U], wv_b5[0U]);
+    wv_a4[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a4[0U], 16U);
+    Lib_IntVector_Intrinsics_vec256 *wv_a5 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b6 = wv + 3U;
+    wv_a5[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a5[0U], wv_b6[0U]);
+    Lib_IntVector_Intrinsics_vec256 *wv_a6 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b7 = wv + 2U;
+    wv_a6[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a6[0U], wv_b7[0U]);
+    wv_a6[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a6[0U], 63U);
     Lib_IntVector_Intrinsics_vec256 *r10 = wv + 1U;
     Lib_IntVector_Intrinsics_vec256 *r21 = wv + 2U;
     Lib_IntVector_Intrinsics_vec256 *r31 = wv + 3U;
@@ -151,38 +156,34 @@ update_block(
     Lib_IntVector_Intrinsics_vec256
     v11 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v02, 3U);
     r31[0U] = v11;
-    uint32_t a0 = 0U;
-    uint32_t b = 1U;
-    uint32_t c = 2U;
-    uint32_t d1 = 3U;
-    Lib_IntVector_Intrinsics_vec256 *wv_a = wv + a0 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b8 = wv + b * 1U;
-    wv_a[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a[0U], wv_b8[0U]);
-    wv_a[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a[0U], z[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a8 = wv + d1 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b9 = wv + a0 * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a7 = wv;
+    Lib_IntVector_Intrinsics_vec256 *wv_b8 = wv + 1U;
+    wv_a7[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a7[0U], wv_b8[0U]);
+    wv_a7[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a7[0U], z[0U]);
+    Lib_IntVector_Intrinsics_vec256 *wv_a8 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b9 = wv;
     wv_a8[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a8[0U], wv_b9[0U]);
     wv_a8[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a8[0U], 32U);
-    Lib_IntVector_Intrinsics_vec256 *wv_a9 = wv + c * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b10 = wv + d1 * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a9 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b10 = wv + 3U;
     wv_a9[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a9[0U], wv_b10[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a10 = wv + b * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b11 = wv + c * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a10 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b11 = wv + 2U;
     wv_a10[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a10[0U], wv_b11[0U]);
     wv_a10[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a10[0U], 24U);
-    Lib_IntVector_Intrinsics_vec256 *wv_a11 = wv + a0 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b12 = wv + b * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a11 = wv;
+    Lib_IntVector_Intrinsics_vec256 *wv_b12 = wv + 1U;
     wv_a11[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a11[0U], wv_b12[0U]);
     wv_a11[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a11[0U], w[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a12 = wv + d1 * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b13 = wv + a0 * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a12 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b13 = wv;
     wv_a12[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a12[0U], wv_b13[0U]);
     wv_a12[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a12[0U], 16U);
-    Lib_IntVector_Intrinsics_vec256 *wv_a13 = wv + c * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b14 = wv + d1 * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a13 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b14 = wv + 3U;
     wv_a13[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a13[0U], wv_b14[0U]);
-    Lib_IntVector_Intrinsics_vec256 *wv_a14 = wv + b * 1U;
-    Lib_IntVector_Intrinsics_vec256 *wv_b = wv + c * 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_a14 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec256 *wv_b = wv + 2U;
     wv_a14[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a14[0U], wv_b[0U]);
     wv_a14[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a14[0U], 63U);
     Lib_IntVector_Intrinsics_vec256 *r11 = wv + 1U;
@@ -240,25 +241,27 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k
   r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
   uint8_t kk1 = (uint8_t)kk;
   uint8_t nn1 = (uint8_t)nn;
+  uint64_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 4U;
     uint8_t *bj = p.salt + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____0;
     os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 6U;
     uint8_t *bj = p.personal + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint64_t)nn1
@@ -289,75 +292,6 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k
   r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
 }
 
-static void
-init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint64_t tmp[8U] = { 0U };
-  Lib_IntVector_Intrinsics_vec256 *r0 = hash;
-  Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U;
-  Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U;
-  Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U;
-  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
-  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
-  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
-  uint8_t kk = p.key_length;
-  uint8_t nn = p.digest_length;
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint64_t)nn
-    ^
-      ((uint64_t)kk
-      << 8U
-      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
-  tmp[1U] = p.node_offset;
-  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
-  tmp[3U] = 0ULL;
-  uint64_t tmp0 = tmp[0U];
-  uint64_t tmp1 = tmp[1U];
-  uint64_t tmp2 = tmp[2U];
-  uint64_t tmp3 = tmp[3U];
-  uint64_t tmp4 = tmp[4U];
-  uint64_t tmp5 = tmp[5U];
-  uint64_t tmp6 = tmp[6U];
-  uint64_t tmp7 = tmp[7U];
-  uint64_t iv0_ = iv0 ^ tmp0;
-  uint64_t iv1_ = iv1 ^ tmp1;
-  uint64_t iv2_ = iv2 ^ tmp2;
-  uint64_t iv3_ = iv3 ^ tmp3;
-  uint64_t iv4_ = iv4 ^ tmp4;
-  uint64_t iv5_ = iv5 ^ tmp5;
-  uint64_t iv6_ = iv6 ^ tmp6;
-  uint64_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
-  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
-}
-
 static void
 update_key(
   Lib_IntVector_Intrinsics_vec256 *wv,
@@ -372,11 +306,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -399,7 +333,7 @@ Hacl_Hash_Blake2b_Simd256_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -408,6 +342,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -418,7 +353,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -452,7 +387,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_Simd256_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -493,6 +428,7 @@ Hacl_Hash_Blake2b_Simd256_finish(
   Lib_IntVector_Intrinsics_vec256 *row1 = hash + 1U;
   Lib_IntVector_Intrinsics_vec256_store64_le(first, row0[0U]);
   Lib_IntVector_Intrinsics_vec256_store64_le(second, row1[0U]);
+  KRML_MAYBE_UNUSED_VAR(b);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
@@ -538,11 +474,11 @@ Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32(
     0U,
     4U,
     1U,
-    uint64_t *os = b0;
     uint8_t *bj = b8 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = b0;
     os[i] = x;);
   uint8_t b80[32U] = { 0U };
   Lib_IntVector_Intrinsics_vec256_store64_le(b80, r1[0U]);
@@ -550,11 +486,11 @@ Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32(
     0U,
     4U,
     1U,
-    uint64_t *os = b1;
     uint8_t *bj = b80 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = b1;
     os[i] = x;);
   uint8_t b81[32U] = { 0U };
   Lib_IntVector_Intrinsics_vec256_store64_le(b81, r2[0U]);
@@ -562,11 +498,11 @@ Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32(
     0U,
     4U,
     1U,
-    uint64_t *os = b2;
     uint8_t *bj = b81 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = b2;
     os[i] = x;);
   uint8_t b82[32U] = { 0U };
   Lib_IntVector_Intrinsics_vec256_store64_le(b82, r3[0U]);
@@ -574,11 +510,11 @@ Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32(
     0U,
     4U,
     1U,
-    uint64_t *os = b3;
     uint8_t *bj = b82 + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = b3;
     os[i] = x;);
 }
 
@@ -593,10 +529,7 @@ Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2b_Simd256_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -610,7 +543,90 @@ static Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    { .fst = kk.key_length, .snd = kk.digest_length, .thd = kk.last_node, .f3 = wv, .f4 = b };
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec256 *h = block_state.f4;
+  uint32_t kk20 = (uint32_t)i0.key_length;
+  uint8_t *k_ = key.snd;
+  if (!(kk20 == 0U))
+  {
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_, kk20 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = h;
+  Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  uint64_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    uint64_t *os = uu____0;
+    os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    uint64_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -624,60 +640,60 @@ static Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_Simd256_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2b_Simd256_state_t
-  *p =
+  *p0 =
     (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof (
         Hacl_Hash_Blake2b_Simd256_state_t
       ));
-  p[0U] = s;
-  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
-  uint8_t kk1 = p1->key_length;
-  uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i.key_length;
-  uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
-  {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
-  }
-  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
-  return p;
+  p0[0U] = s;
+  return p0;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -685,21 +701,16 @@ Hacl_Hash_Blake2b_Simd256_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2b_Simd256_state_t
-  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 {
@@ -709,39 +720,106 @@ Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2b_Simd256_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
-  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*state).block_state;
+  uint8_t *buf = (*state).buf;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
-  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i1.key_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec256 *h = block_state.f4;
+  uint32_t kk20 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
+  if (!(kk20 == 0U))
   {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk20 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
-  uint8_t kk11 = i.key_length;
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = h;
+  Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  uint64_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    uint64_t *os = uu____0;
+    os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    uint64_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
+  uint8_t kk11 = i0.key_length;
   uint32_t ite;
   if (kk11 != 0U)
   {
@@ -751,15 +829,16 @@ reset_raw(
   {
     ite = 0U;
   }
-  Hacl_Hash_Blake2b_Simd256_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)ite;
+  state->total_len = total_len;
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -768,15 +847,17 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k)
 {
@@ -791,11 +872,16 @@ void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
@@ -803,7 +889,7 @@ void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -812,8 +898,8 @@ Hacl_Hash_Blake2b_Simd256_update(
   uint32_t chunk_len
 )
 {
-  Hacl_Hash_Blake2b_Simd256_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -829,10 +915,8 @@ Hacl_Hash_Blake2b_Simd256_update(
   }
   if (chunk_len <= 128U - sz)
   {
-    Hacl_Hash_Blake2b_Simd256_state_t s1 = *state;
-    Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -845,22 +929,12 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Hash_Blake2b_Simd256_state_t s1 = *state;
-    Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -873,10 +947,8 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
-      Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
-      Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
+      Lib_IntVector_Intrinsics_vec256 *hash = block_state.f4;
+      Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_Simd256_update_multi(128U,
         wv,
@@ -899,9 +971,8 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
-    Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
-    Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
+    Lib_IntVector_Intrinsics_vec256 *hash = block_state.f4;
+    Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_Simd256_update_multi(data1_len,
       wv,
@@ -911,25 +982,15 @@ Hacl_Hash_Blake2b_Simd256_update(
       nb);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 128U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Hash_Blake2b_Simd256_state_t s1 = *state;
-    Hacl_Hash_Blake2b_Simd256_block_state_t block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)128U == 0ULL && total_len10 > 0ULL)
     {
@@ -939,22 +1000,12 @@ Hacl_Hash_Blake2b_Simd256_update(
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)128U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_Simd256_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Hash_Blake2b_Simd256_state_t s10 = *state;
-    Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -967,16 +1018,14 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
-      Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
-      Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
+      Lib_IntVector_Intrinsics_vec256 *hash = block_state.f4;
+      Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2b_Simd256_update_multi(128U,
         wv,
         hash,
         FStar_UInt128_uint64_to_uint128(prevlen),
-        buf,
+        buf0,
         nb);
     }
     uint32_t ite;
@@ -994,9 +1043,8 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
-    Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
-    Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
+    Lib_IntVector_Intrinsics_vec256 *hash = block_state.f4;
+    Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3;
     uint32_t nb = data1_len / 128U;
     Hacl_Hash_Blake2b_Simd256_update_multi(data1_len,
       wv,
@@ -1004,35 +1052,35 @@ Hacl_Hash_Blake2b_Simd256_update(
       FStar_UInt128_uint64_to_uint128(total_len1),
       data1,
       nb);
-    uint8_t *dst = buf;
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2b_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  uint8_t *buf_ = (*s).buf;
+  uint64_t total_len = (*s).total_len;
   uint32_t r;
   if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL)
   {
@@ -1047,11 +1095,12 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
   Hacl_Hash_Blake2b_Simd256_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.thd.snd;
+    { .fst = i1.key_length, .snd = i1.digest_length, .thd = i1.last_node, .f3 = wv0, .f4 = b };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.f4;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.f4;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 128U == 0U && r > 0U)
   {
@@ -1062,11 +1111,8 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     ite = r % 128U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
-  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc0 = tmp_block_state.thd;
-  Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst;
-  Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd;
+  Lib_IntVector_Intrinsics_vec256 *hash0 = tmp_block_state.f4;
+  Lib_IntVector_Intrinsics_vec256 *wv1 = tmp_block_state.f3;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2b_Simd256_update_multi(0U,
     wv1,
@@ -1075,18 +1121,34 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc = tmp_block_state.thd;
-  Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
-  Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
+  Lib_IntVector_Intrinsics_vec256 *hash = tmp_block_state.f4;
+  Lib_IntVector_Intrinsics_vec256 *wv = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Hacl_Hash_Blake2b_Simd256_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn1, dst, tmp_block_state.f4);
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1097,8 +1159,8 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec256 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec256 *b = block_state.f4;
+  Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1106,18 +1168,18 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state)
 {
-  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -1131,9 +1193,10 @@ Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.thd.snd;
+  block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = i.last_node, .f3 = wv, .f4 = b };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.f4;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.f4;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1175,8 +1238,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
 }
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
@@ -1203,25 +1272,27 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
   r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
   uint8_t kk = params.key_length;
   uint8_t nn = params.digest_length;
+  uint64_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 4U;
     uint8_t *bj = params.salt + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____0;
     os[i] = x;);
+  uint64_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint64_t *os = tmp + 6U;
     uint8_t *bj = params.personal + i * 8U;
     uint64_t u = load64_le(bj);
     uint64_t r = u;
     uint64_t x = r;
+    uint64_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint64_t)nn
diff --git a/src/Hacl_Hash_Blake2s.c b/src/Hacl_Hash_Blake2s.c
index 6e19d83d..60cc5c7c 100644
--- a/src/Hacl_Hash_Blake2s.c
+++ b/src/Hacl_Hash_Blake2s.c
@@ -30,18 +30,25 @@
 #include "lib_memzero0.h"
 
 static inline void
-update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *d)
+update_block(
+  uint32_t *wv,
+  uint32_t *hash,
+  bool flag,
+  bool last_node,
+  uint64_t totlen,
+  uint8_t *d
+)
 {
   uint32_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
     0U,
     16U,
     1U,
-    uint32_t *os = m_w;
     uint8_t *bj = d + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = m_w;
     os[i] = x;);
   uint32_t mask[4U] = { 0U };
   uint32_t wv_14;
@@ -53,7 +60,15 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask[0U] = (uint32_t)totlen;
   mask[1U] = (uint32_t)(totlen >> 32U);
   mask[2U] = wv_14;
@@ -64,8 +79,8 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
     0U,
     4U,
     1U,
-    uint32_t *os = wv3;
     uint32_t x = wv3[i] ^ mask[i];
+    uint32_t *os = wv3;
     os[i] = x;);
   KRML_MAYBE_FOR10(i0,
     0U,
@@ -125,131 +140,127 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
     uint32_t *y = m_st + 4U;
     uint32_t *z = m_st + 8U;
     uint32_t *w = m_st + 12U;
-    uint32_t a = 0U;
-    uint32_t b0 = 1U;
-    uint32_t c0 = 2U;
-    uint32_t d10 = 3U;
-    uint32_t *wv_a0 = wv + a * 4U;
-    uint32_t *wv_b0 = wv + b0 * 4U;
+    uint32_t *wv_a = wv;
+    uint32_t *wv_b0 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a0;
-      uint32_t x1 = wv_a0[i] + wv_b0[i];
+      uint32_t x1 = wv_a[i] + wv_b0[i];
+      uint32_t *os = wv_a;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a0;
-      uint32_t x1 = wv_a0[i] + x[i];
+      uint32_t x1 = wv_a[i] + x[i];
+      uint32_t *os = wv_a;
       os[i] = x1;);
-    uint32_t *wv_a1 = wv + d10 * 4U;
-    uint32_t *wv_b1 = wv + a * 4U;
+    uint32_t *wv_a0 = wv + 12U;
+    uint32_t *wv_b1 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a1;
-      uint32_t x1 = wv_a1[i] ^ wv_b1[i];
+      uint32_t x1 = wv_a0[i] ^ wv_b1[i];
+      uint32_t *os = wv_a0;
       os[i] = x1;);
-    uint32_t *r10 = wv_a1;
+    uint32_t *r10 = wv_a0;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r10;
       uint32_t x1 = r10[i];
       uint32_t x10 = x1 >> 16U | x1 << 16U;
+      uint32_t *os = r10;
       os[i] = x10;);
-    uint32_t *wv_a2 = wv + c0 * 4U;
-    uint32_t *wv_b2 = wv + d10 * 4U;
+    uint32_t *wv_a1 = wv + 8U;
+    uint32_t *wv_b2 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a2;
-      uint32_t x1 = wv_a2[i] + wv_b2[i];
+      uint32_t x1 = wv_a1[i] + wv_b2[i];
+      uint32_t *os = wv_a1;
       os[i] = x1;);
-    uint32_t *wv_a3 = wv + b0 * 4U;
-    uint32_t *wv_b3 = wv + c0 * 4U;
+    uint32_t *wv_a2 = wv + 4U;
+    uint32_t *wv_b3 = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a3;
-      uint32_t x1 = wv_a3[i] ^ wv_b3[i];
+      uint32_t x1 = wv_a2[i] ^ wv_b3[i];
+      uint32_t *os = wv_a2;
       os[i] = x1;);
-    uint32_t *r12 = wv_a3;
+    uint32_t *r12 = wv_a2;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r12;
       uint32_t x1 = r12[i];
       uint32_t x10 = x1 >> 12U | x1 << 20U;
+      uint32_t *os = r12;
       os[i] = x10;);
-    uint32_t *wv_a4 = wv + a * 4U;
-    uint32_t *wv_b4 = wv + b0 * 4U;
+    uint32_t *wv_a3 = wv;
+    uint32_t *wv_b4 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a4;
-      uint32_t x1 = wv_a4[i] + wv_b4[i];
+      uint32_t x1 = wv_a3[i] + wv_b4[i];
+      uint32_t *os = wv_a3;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a4;
-      uint32_t x1 = wv_a4[i] + y[i];
+      uint32_t x1 = wv_a3[i] + y[i];
+      uint32_t *os = wv_a3;
       os[i] = x1;);
-    uint32_t *wv_a5 = wv + d10 * 4U;
-    uint32_t *wv_b5 = wv + a * 4U;
+    uint32_t *wv_a4 = wv + 12U;
+    uint32_t *wv_b5 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a5;
-      uint32_t x1 = wv_a5[i] ^ wv_b5[i];
+      uint32_t x1 = wv_a4[i] ^ wv_b5[i];
+      uint32_t *os = wv_a4;
       os[i] = x1;);
-    uint32_t *r13 = wv_a5;
+    uint32_t *r13 = wv_a4;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r13;
       uint32_t x1 = r13[i];
       uint32_t x10 = x1 >> 8U | x1 << 24U;
+      uint32_t *os = r13;
       os[i] = x10;);
-    uint32_t *wv_a6 = wv + c0 * 4U;
-    uint32_t *wv_b6 = wv + d10 * 4U;
+    uint32_t *wv_a5 = wv + 8U;
+    uint32_t *wv_b6 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a6;
-      uint32_t x1 = wv_a6[i] + wv_b6[i];
+      uint32_t x1 = wv_a5[i] + wv_b6[i];
+      uint32_t *os = wv_a5;
       os[i] = x1;);
-    uint32_t *wv_a7 = wv + b0 * 4U;
-    uint32_t *wv_b7 = wv + c0 * 4U;
+    uint32_t *wv_a6 = wv + 4U;
+    uint32_t *wv_b7 = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a7;
-      uint32_t x1 = wv_a7[i] ^ wv_b7[i];
+      uint32_t x1 = wv_a6[i] ^ wv_b7[i];
+      uint32_t *os = wv_a6;
       os[i] = x1;);
-    uint32_t *r14 = wv_a7;
+    uint32_t *r14 = wv_a6;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r14;
       uint32_t x1 = r14[i];
       uint32_t x10 = x1 >> 7U | x1 << 25U;
+      uint32_t *os = r14;
       os[i] = x10;);
     uint32_t *r15 = wv + 4U;
     uint32_t *r21 = wv + 8U;
@@ -281,131 +292,127 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
     r112[1U] = x12;
     r112[2U] = x22;
     r112[3U] = x32;
-    uint32_t a0 = 0U;
-    uint32_t b = 1U;
-    uint32_t c = 2U;
-    uint32_t d1 = 3U;
-    uint32_t *wv_a = wv + a0 * 4U;
-    uint32_t *wv_b8 = wv + b * 4U;
+    uint32_t *wv_a7 = wv;
+    uint32_t *wv_b8 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a;
-      uint32_t x1 = wv_a[i] + wv_b8[i];
+      uint32_t x1 = wv_a7[i] + wv_b8[i];
+      uint32_t *os = wv_a7;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a;
-      uint32_t x1 = wv_a[i] + z[i];
+      uint32_t x1 = wv_a7[i] + z[i];
+      uint32_t *os = wv_a7;
       os[i] = x1;);
-    uint32_t *wv_a8 = wv + d1 * 4U;
-    uint32_t *wv_b9 = wv + a0 * 4U;
+    uint32_t *wv_a8 = wv + 12U;
+    uint32_t *wv_b9 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a8;
       uint32_t x1 = wv_a8[i] ^ wv_b9[i];
+      uint32_t *os = wv_a8;
       os[i] = x1;);
     uint32_t *r16 = wv_a8;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r16;
       uint32_t x1 = r16[i];
       uint32_t x13 = x1 >> 16U | x1 << 16U;
+      uint32_t *os = r16;
       os[i] = x13;);
-    uint32_t *wv_a9 = wv + c * 4U;
-    uint32_t *wv_b10 = wv + d1 * 4U;
+    uint32_t *wv_a9 = wv + 8U;
+    uint32_t *wv_b10 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a9;
       uint32_t x1 = wv_a9[i] + wv_b10[i];
+      uint32_t *os = wv_a9;
       os[i] = x1;);
-    uint32_t *wv_a10 = wv + b * 4U;
-    uint32_t *wv_b11 = wv + c * 4U;
+    uint32_t *wv_a10 = wv + 4U;
+    uint32_t *wv_b11 = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a10;
       uint32_t x1 = wv_a10[i] ^ wv_b11[i];
+      uint32_t *os = wv_a10;
       os[i] = x1;);
     uint32_t *r17 = wv_a10;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r17;
       uint32_t x1 = r17[i];
       uint32_t x13 = x1 >> 12U | x1 << 20U;
+      uint32_t *os = r17;
       os[i] = x13;);
-    uint32_t *wv_a11 = wv + a0 * 4U;
-    uint32_t *wv_b12 = wv + b * 4U;
+    uint32_t *wv_a11 = wv;
+    uint32_t *wv_b12 = wv + 4U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a11;
       uint32_t x1 = wv_a11[i] + wv_b12[i];
+      uint32_t *os = wv_a11;
       os[i] = x1;);
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a11;
       uint32_t x1 = wv_a11[i] + w[i];
+      uint32_t *os = wv_a11;
       os[i] = x1;);
-    uint32_t *wv_a12 = wv + d1 * 4U;
-    uint32_t *wv_b13 = wv + a0 * 4U;
+    uint32_t *wv_a12 = wv + 12U;
+    uint32_t *wv_b13 = wv;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a12;
       uint32_t x1 = wv_a12[i] ^ wv_b13[i];
+      uint32_t *os = wv_a12;
       os[i] = x1;);
     uint32_t *r18 = wv_a12;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r18;
       uint32_t x1 = r18[i];
       uint32_t x13 = x1 >> 8U | x1 << 24U;
+      uint32_t *os = r18;
       os[i] = x13;);
-    uint32_t *wv_a13 = wv + c * 4U;
-    uint32_t *wv_b14 = wv + d1 * 4U;
+    uint32_t *wv_a13 = wv + 8U;
+    uint32_t *wv_b14 = wv + 12U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a13;
       uint32_t x1 = wv_a13[i] + wv_b14[i];
+      uint32_t *os = wv_a13;
       os[i] = x1;);
-    uint32_t *wv_a14 = wv + b * 4U;
-    uint32_t *wv_b = wv + c * 4U;
+    uint32_t *wv_a14 = wv + 4U;
+    uint32_t *wv_b = wv + 8U;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = wv_a14;
       uint32_t x1 = wv_a14[i] ^ wv_b[i];
+      uint32_t *os = wv_a14;
       os[i] = x1;);
     uint32_t *r19 = wv_a14;
     KRML_MAYBE_FOR4(i,
       0U,
       4U,
       1U,
-      uint32_t *os = r19;
       uint32_t x1 = r19[i];
       uint32_t x13 = x1 >> 7U | x1 << 25U;
+      uint32_t *os = r19;
       os[i] = x13;);
     uint32_t *r113 = wv + 4U;
     uint32_t *r2 = wv + 8U;
@@ -447,29 +454,29 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
     0U,
     4U,
     1U,
-    uint32_t *os = s0;
     uint32_t x = s0[i] ^ r0[i];
+    uint32_t *os = s0;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint32_t *os = s0;
     uint32_t x = s0[i] ^ r2[i];
+    uint32_t *os = s0;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint32_t *os = s1;
     uint32_t x = s1[i] ^ r1[i];
+    uint32_t *os = s1;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint32_t *os = s1;
     uint32_t x = s1[i] ^ r3[i];
+    uint32_t *os = s1;
     os[i] = x;);
 }
 
@@ -504,25 +511,27 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
   r3[1U] = iv5;
   r3[2U] = iv6;
   r3[3U] = iv7;
+  uint32_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 4U;
     uint8_t *bj = p.salt + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 6U;
     uint8_t *bj = p.personal + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint32_t)(uint8_t)nn
@@ -558,83 +567,6 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
   r1[3U] = iv7_;
 }
 
-static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint32_t tmp[8U] = { 0U };
-  uint32_t *r0 = hash;
-  uint32_t *r1 = hash + 4U;
-  uint32_t *r2 = hash + 8U;
-  uint32_t *r3 = hash + 12U;
-  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
-  r2[0U] = iv0;
-  r2[1U] = iv1;
-  r2[2U] = iv2;
-  r2[3U] = iv3;
-  r3[0U] = iv4;
-  r3[1U] = iv5;
-  r3[2U] = iv6;
-  r3[3U] = iv7;
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint32_t)p.digest_length
-    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
-  tmp[1U] = p.leaf_length;
-  tmp[2U] = (uint32_t)p.node_offset;
-  tmp[3U] =
-    (uint32_t)(p.node_offset >> 32U)
-    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
-  uint32_t tmp0 = tmp[0U];
-  uint32_t tmp1 = tmp[1U];
-  uint32_t tmp2 = tmp[2U];
-  uint32_t tmp3 = tmp[3U];
-  uint32_t tmp4 = tmp[4U];
-  uint32_t tmp5 = tmp[5U];
-  uint32_t tmp6 = tmp[6U];
-  uint32_t tmp7 = tmp[7U];
-  uint32_t iv0_ = iv0 ^ tmp0;
-  uint32_t iv1_ = iv1 ^ tmp1;
-  uint32_t iv2_ = iv2 ^ tmp2;
-  uint32_t iv3_ = iv3 ^ tmp3;
-  uint32_t iv4_ = iv4 ^ tmp4;
-  uint32_t iv5_ = iv5 ^ tmp5;
-  uint32_t iv6_ = iv6 ^ tmp6;
-  uint32_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = iv0_;
-  r0[1U] = iv1_;
-  r0[2U] = iv2_;
-  r0[3U] = iv3_;
-  r1[0U] = iv4_;
-  r1[1U] = iv5_;
-  r1[2U] = iv6_;
-  r1[3U] = iv7_;
-}
-
 static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
 {
   uint64_t lb = (uint64_t)64U;
@@ -642,11 +574,11 @@ static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -666,7 +598,7 @@ Hacl_Hash_Blake2s_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -675,6 +607,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -684,7 +617,7 @@ Hacl_Hash_Blake2s_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -712,7 +645,7 @@ update_blocks(uint32_t len, uint32_t *wv, uint32_t *hash, uint64_t prev, uint8_t
     rem = rem0;
   }
   Hacl_Hash_Blake2s_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -741,22 +674,112 @@ void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash)
   uint32_t *row1 = hash + 4U;
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store32_le(first + i * 4U, row0[i]););
   KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store32_le(second + i * 4U, row1[i]););
+  KRML_MAYBE_UNUSED_VAR(b);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
   Lib_Memzero0_memzero(b, 32U, uint8_t, void *);
 }
 
 static Hacl_Hash_Blake2s_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    { .fst = kk.key_length, .snd = kk.digest_length, .thd = kk.last_node, .f3 = wv, .f4 = b };
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint32_t *h = block_state.f4;
+  uint32_t kk2 = (uint32_t)i0.key_length;
+  uint8_t *k_ = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = h;
+  uint32_t *r1 = h + 4U;
+  uint32_t *r2 = h + 8U;
+  uint32_t *r3 = h + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint32_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    uint32_t *os = uu____0;
+    os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    uint32_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -770,53 +793,56 @@ static Hacl_Hash_Blake2s_state_t
   Hacl_Hash_Blake2s_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2s_state_t
-  *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t));
-  p[0U] = s;
-  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
-  uint8_t kk1 = p1->key_length;
-  uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i.key_length;
-  uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
-  {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
-  }
-  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
-  return p;
+  *p0 = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t));
+  p0[0U] = s;
+  return p0;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -824,20 +850,15 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
-  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 {
@@ -847,28 +868,29 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s)
 {
   Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2s_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
-  Hacl_Hash_Blake2s_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
+  Hacl_Hash_Blake2s_block_state_t block_state = (*state).block_state;
+  uint8_t *buf = (*state).buf;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
-  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint32_t *h = block_state.f4;
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -878,8 +900,82 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
-  uint8_t kk11 = i.key_length;
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = h;
+  uint32_t *r1 = h + 4U;
+  uint32_t *r2 = h + 8U;
+  uint32_t *r3 = h + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint32_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    uint32_t *os = uu____0;
+    os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    uint32_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
+  uint8_t kk11 = i0.key_length;
   uint32_t ite;
   if (kk11 != 0U)
   {
@@ -889,15 +985,16 @@ reset_raw(
   {
     ite = 0U;
   }
-  Hacl_Hash_Blake2s_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)ite;
+  state->total_len = total_len;
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -906,15 +1003,17 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
 {
@@ -929,11 +1028,16 @@ void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 {
@@ -941,13 +1045,13 @@ void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Hash_Blake2s_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Hacl_Hash_Blake2s_block_state_t block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -963,10 +1067,8 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Hash_Blake2s_state_t s1 = *state;
-    Hacl_Hash_Blake2s_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -979,22 +1081,12 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Hash_Blake2s_state_t s1 = *state;
-    Hacl_Hash_Blake2s_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1007,9 +1099,8 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
-      uint32_t *wv = acc.fst;
-      uint32_t *hash = acc.snd;
+      uint32_t *hash = block_state.f4;
+      uint32_t *wv = block_state.f3;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -1027,32 +1118,21 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
-    uint32_t *wv = acc.fst;
-    uint32_t *hash = acc.snd;
+    uint32_t *hash = block_state.f4;
+    uint32_t *wv = block_state.f3;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Hash_Blake2s_state_t s1 = *state;
-    Hacl_Hash_Blake2s_block_state_t block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -1062,22 +1142,12 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Hash_Blake2s_state_t s10 = *state;
-    Hacl_Hash_Blake2s_block_state_t block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1090,11 +1160,10 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
-      uint32_t *wv = acc.fst;
-      uint32_t *hash = acc.snd;
+      uint32_t *hash = block_state.f4;
+      uint32_t *wv = block_state.f3;
       uint32_t nb = 1U;
-      Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb);
+      Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf0, nb);
     }
     uint32_t ite;
     if
@@ -1111,39 +1180,39 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
-    uint32_t *wv = acc.fst;
-    uint32_t *hash = acc.snd;
+    uint32_t *hash = block_state.f4;
+    uint32_t *wv = block_state.f3;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb);
-    uint8_t *dst = buf;
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_state_t scrut = *state;
-  Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  uint8_t *buf_ = (*s).buf;
+  uint64_t total_len = (*s).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -1158,11 +1227,12 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   uint32_t b[16U] = { 0U };
   Hacl_Hash_Blake2s_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint32_t *src_b = block_state.thd.snd;
-  uint32_t *dst_b = tmp_block_state.thd.snd;
+    { .fst = i1.key_length, .snd = i1.digest_length, .thd = i1.last_node, .f3 = wv0, .f4 = b };
+  uint32_t *src_b = block_state.f4;
+  uint32_t *dst_b = tmp_block_state.f4;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -1173,19 +1243,33 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
-  K____uint32_t___uint32_t_ acc0 = tmp_block_state.thd;
-  uint32_t *wv1 = acc0.fst;
-  uint32_t *hash0 = acc0.snd;
+  uint32_t *hash0 = tmp_block_state.f4;
+  uint32_t *wv1 = tmp_block_state.f3;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint32_t___uint32_t_ acc = tmp_block_state.thd;
-  uint32_t *wv = acc.fst;
-  uint32_t *hash = acc.snd;
-  Hacl_Hash_Blake2s_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint32_t *hash = tmp_block_state.f4;
+  uint32_t *wv = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
+  Hacl_Hash_Blake2s_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_finish((uint32_t)nn1, dst, tmp_block_state.f4);
+  Hacl_Hash_Blake2s_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1196,8 +1280,8 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint32_t *b = block_state.thd.snd;
-  uint32_t *wv = block_state.thd.fst;
+  uint32_t *b = block_state.f4;
+  uint32_t *wv = block_state.f3;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1205,25 +1289,26 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state)
 {
-  Hacl_Hash_Blake2s_state_t scrut = *state;
-  Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint32_t *src_b = block_state0.thd.snd;
-  uint32_t *dst_b = block_state.thd.snd;
+  block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = i.last_node, .f3 = wv, .f4 = b };
+  uint32_t *src_b = block_state0.f4;
+  uint32_t *dst_b = block_state.f4;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   Hacl_Hash_Blake2s_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1262,8 +1347,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
@@ -1294,25 +1385,27 @@ Hacl_Hash_Blake2s_hash_with_key_and_paramas(
   r3[1U] = iv5;
   r3[2U] = iv6;
   r3[3U] = iv7;
+  uint32_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 4U;
     uint8_t *bj = params.salt + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 6U;
     uint8_t *bj = params.personal + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint32_t)params.digest_length
diff --git a/src/Hacl_Hash_Blake2s_Simd128.c b/src/Hacl_Hash_Blake2s_Simd128.c
index c02da8fa..57fd9b25 100644
--- a/src/Hacl_Hash_Blake2s_Simd128.c
+++ b/src/Hacl_Hash_Blake2s_Simd128.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
   bool flag,
+  bool last_node,
   uint64_t totlen,
   uint8_t *d
 )
@@ -43,11 +44,11 @@ update_block(
     0U,
     16U,
     1U,
-    uint32_t *os = m_w;
     uint8_t *bj = d + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = m_w;
     os[i] = x;);
   Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_zero;
   uint32_t wv_14;
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)totlen,
       (uint32_t)(totlen >> 32U),
@@ -102,40 +111,36 @@ update_block(
     Lib_IntVector_Intrinsics_vec128 *y = m_st + 1U;
     Lib_IntVector_Intrinsics_vec128 *z = m_st + 2U;
     Lib_IntVector_Intrinsics_vec128 *w = m_st + 3U;
-    uint32_t a = 0U;
-    uint32_t b0 = 1U;
-    uint32_t c0 = 2U;
-    uint32_t d10 = 3U;
-    Lib_IntVector_Intrinsics_vec128 *wv_a0 = wv + a * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b0 = wv + b0 * 1U;
-    wv_a0[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a0[0U], wv_b0[0U]);
-    wv_a0[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a0[0U], x[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a1 = wv + d10 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b1 = wv + a * 1U;
-    wv_a1[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a1[0U], wv_b1[0U]);
-    wv_a1[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a1[0U], 16U);
-    Lib_IntVector_Intrinsics_vec128 *wv_a2 = wv + c0 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b2 = wv + d10 * 1U;
-    wv_a2[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a2[0U], wv_b2[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a3 = wv + b0 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b3 = wv + c0 * 1U;
-    wv_a3[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a3[0U], wv_b3[0U]);
-    wv_a3[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a3[0U], 12U);
-    Lib_IntVector_Intrinsics_vec128 *wv_a4 = wv + a * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b4 = wv + b0 * 1U;
-    wv_a4[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a4[0U], wv_b4[0U]);
-    wv_a4[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a4[0U], y[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a5 = wv + d10 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b5 = wv + a * 1U;
-    wv_a5[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a5[0U], wv_b5[0U]);
-    wv_a5[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a5[0U], 8U);
-    Lib_IntVector_Intrinsics_vec128 *wv_a6 = wv + c0 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b6 = wv + d10 * 1U;
-    wv_a6[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a6[0U], wv_b6[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a7 = wv + b0 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b7 = wv + c0 * 1U;
-    wv_a7[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a7[0U], wv_b7[0U]);
-    wv_a7[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a7[0U], 7U);
+    Lib_IntVector_Intrinsics_vec128 *wv_a = wv;
+    Lib_IntVector_Intrinsics_vec128 *wv_b0 = wv + 1U;
+    wv_a[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a[0U], wv_b0[0U]);
+    wv_a[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a[0U], x[0U]);
+    Lib_IntVector_Intrinsics_vec128 *wv_a0 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b1 = wv;
+    wv_a0[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a0[0U], wv_b1[0U]);
+    wv_a0[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a0[0U], 16U);
+    Lib_IntVector_Intrinsics_vec128 *wv_a1 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b2 = wv + 3U;
+    wv_a1[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a1[0U], wv_b2[0U]);
+    Lib_IntVector_Intrinsics_vec128 *wv_a2 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b3 = wv + 2U;
+    wv_a2[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a2[0U], wv_b3[0U]);
+    wv_a2[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a2[0U], 12U);
+    Lib_IntVector_Intrinsics_vec128 *wv_a3 = wv;
+    Lib_IntVector_Intrinsics_vec128 *wv_b4 = wv + 1U;
+    wv_a3[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a3[0U], wv_b4[0U]);
+    wv_a3[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a3[0U], y[0U]);
+    Lib_IntVector_Intrinsics_vec128 *wv_a4 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b5 = wv;
+    wv_a4[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a4[0U], wv_b5[0U]);
+    wv_a4[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a4[0U], 8U);
+    Lib_IntVector_Intrinsics_vec128 *wv_a5 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b6 = wv + 3U;
+    wv_a5[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a5[0U], wv_b6[0U]);
+    Lib_IntVector_Intrinsics_vec128 *wv_a6 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b7 = wv + 2U;
+    wv_a6[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a6[0U], wv_b7[0U]);
+    wv_a6[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a6[0U], 7U);
     Lib_IntVector_Intrinsics_vec128 *r10 = wv + 1U;
     Lib_IntVector_Intrinsics_vec128 *r21 = wv + 2U;
     Lib_IntVector_Intrinsics_vec128 *r31 = wv + 3U;
@@ -151,38 +156,34 @@ update_block(
     Lib_IntVector_Intrinsics_vec128
     v11 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v02, 3U);
     r31[0U] = v11;
-    uint32_t a0 = 0U;
-    uint32_t b = 1U;
-    uint32_t c = 2U;
-    uint32_t d1 = 3U;
-    Lib_IntVector_Intrinsics_vec128 *wv_a = wv + a0 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b8 = wv + b * 1U;
-    wv_a[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a[0U], wv_b8[0U]);
-    wv_a[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a[0U], z[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a8 = wv + d1 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b9 = wv + a0 * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a7 = wv;
+    Lib_IntVector_Intrinsics_vec128 *wv_b8 = wv + 1U;
+    wv_a7[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a7[0U], wv_b8[0U]);
+    wv_a7[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a7[0U], z[0U]);
+    Lib_IntVector_Intrinsics_vec128 *wv_a8 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b9 = wv;
     wv_a8[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a8[0U], wv_b9[0U]);
     wv_a8[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a8[0U], 16U);
-    Lib_IntVector_Intrinsics_vec128 *wv_a9 = wv + c * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b10 = wv + d1 * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a9 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b10 = wv + 3U;
     wv_a9[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a9[0U], wv_b10[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a10 = wv + b * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b11 = wv + c * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a10 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b11 = wv + 2U;
     wv_a10[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a10[0U], wv_b11[0U]);
     wv_a10[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a10[0U], 12U);
-    Lib_IntVector_Intrinsics_vec128 *wv_a11 = wv + a0 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b12 = wv + b * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a11 = wv;
+    Lib_IntVector_Intrinsics_vec128 *wv_b12 = wv + 1U;
     wv_a11[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a11[0U], wv_b12[0U]);
     wv_a11[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a11[0U], w[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a12 = wv + d1 * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b13 = wv + a0 * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a12 = wv + 3U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b13 = wv;
     wv_a12[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a12[0U], wv_b13[0U]);
     wv_a12[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a12[0U], 8U);
-    Lib_IntVector_Intrinsics_vec128 *wv_a13 = wv + c * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b14 = wv + d1 * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a13 = wv + 2U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b14 = wv + 3U;
     wv_a13[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a13[0U], wv_b14[0U]);
-    Lib_IntVector_Intrinsics_vec128 *wv_a14 = wv + b * 1U;
-    Lib_IntVector_Intrinsics_vec128 *wv_b = wv + c * 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_a14 = wv + 1U;
+    Lib_IntVector_Intrinsics_vec128 *wv_b = wv + 2U;
     wv_a14[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a14[0U], wv_b[0U]);
     wv_a14[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a14[0U], 7U);
     Lib_IntVector_Intrinsics_vec128 *r11 = wv + 1U;
@@ -238,25 +239,27 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k
   uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
   r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
   r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  uint32_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 4U;
     uint8_t *bj = p.salt + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 6U;
     uint8_t *bj = p.personal + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint32_t)(uint8_t)nn
@@ -286,72 +289,6 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k
   r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
 }
 
-static void
-init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint32_t tmp[8U] = { 0U };
-  Lib_IntVector_Intrinsics_vec128 *r0 = hash;
-  Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U;
-  Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U;
-  Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U;
-  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
-  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
-  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint32_t)p.digest_length
-    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
-  tmp[1U] = p.leaf_length;
-  tmp[2U] = (uint32_t)p.node_offset;
-  tmp[3U] =
-    (uint32_t)(p.node_offset >> 32U)
-    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
-  uint32_t tmp0 = tmp[0U];
-  uint32_t tmp1 = tmp[1U];
-  uint32_t tmp2 = tmp[2U];
-  uint32_t tmp3 = tmp[3U];
-  uint32_t tmp4 = tmp[4U];
-  uint32_t tmp5 = tmp[5U];
-  uint32_t tmp6 = tmp[6U];
-  uint32_t tmp7 = tmp[7U];
-  uint32_t iv0_ = iv0 ^ tmp0;
-  uint32_t iv1_ = iv1 ^ tmp1;
-  uint32_t iv2_ = iv2 ^ tmp2;
-  uint32_t iv3_ = iv3 ^ tmp3;
-  uint32_t iv4_ = iv4 ^ tmp4;
-  uint32_t iv5_ = iv5 ^ tmp5;
-  uint32_t iv6_ = iv6 ^ tmp6;
-  uint32_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
-  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
-}
-
 static void
 update_key(
   Lib_IntVector_Intrinsics_vec128 *wv,
@@ -366,11 +303,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -390,7 +327,7 @@ Hacl_Hash_Blake2s_Simd128_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -399,6 +336,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -408,7 +346,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -442,7 +380,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2s_Simd128_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -483,6 +421,7 @@ Hacl_Hash_Blake2s_Simd128_finish(
   Lib_IntVector_Intrinsics_vec128 *row1 = hash + 1U;
   Lib_IntVector_Intrinsics_vec128_store32_le(first, row0[0U]);
   Lib_IntVector_Intrinsics_vec128_store32_le(second, row1[0U]);
+  KRML_MAYBE_UNUSED_VAR(b);
   uint8_t *final = b;
   memcpy(output, final, nn * sizeof (uint8_t));
   Lib_Memzero0_memzero(b, 32U, uint8_t, void *);
@@ -508,11 +447,11 @@ Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32(
     0U,
     4U,
     1U,
-    uint32_t *os = b0;
     uint8_t *bj = b8 + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = b0;
     os[i] = x;);
   uint8_t b80[16U] = { 0U };
   Lib_IntVector_Intrinsics_vec128_store32_le(b80, r1[0U]);
@@ -520,11 +459,11 @@ Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32(
     0U,
     4U,
     1U,
-    uint32_t *os = b1;
     uint8_t *bj = b80 + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = b1;
     os[i] = x;);
   uint8_t b81[16U] = { 0U };
   Lib_IntVector_Intrinsics_vec128_store32_le(b81, r2[0U]);
@@ -532,11 +471,11 @@ Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32(
     0U,
     4U,
     1U,
-    uint32_t *os = b2;
     uint8_t *bj = b81 + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = b2;
     os[i] = x;);
   uint8_t b82[16U] = { 0U };
   Lib_IntVector_Intrinsics_vec128_store32_le(b82, r3[0U]);
@@ -544,11 +483,11 @@ Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32(
     0U,
     4U,
     1U,
-    uint32_t *os = b3;
     uint8_t *bj = b82 + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = b3;
     os[i] = x;);
 }
 
@@ -583,10 +522,7 @@ Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2s_Simd128_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -600,7 +536,87 @@ static Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    { .fst = kk.key_length, .snd = kk.digest_length, .thd = kk.last_node, .f3 = wv, .f4 = b };
+  Hacl_Hash_Blake2b_blake2_params *p = key.fst;
+  uint8_t kk1 = p->key_length;
+  uint8_t nn = p->digest_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec128 *h = block_state.f4;
+  uint32_t kk2 = (uint32_t)i0.key_length;
+  uint8_t *k_ = key.snd;
+  if (!(kk2 == 0U))
+  {
+    uint8_t *sub_b = buf + kk2;
+    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
+    memcpy(buf, k_, kk2 * sizeof (uint8_t));
+  }
+  Hacl_Hash_Blake2b_blake2_params pv = p[0U];
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = h;
+  Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  uint32_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    uint32_t *os = uu____0;
+    os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    uint32_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -614,60 +630,60 @@ static Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2s_Simd128_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
   Hacl_Hash_Blake2s_Simd128_state_t
-  *p =
+  *p0 =
     (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof (
         Hacl_Hash_Blake2s_Simd128_state_t
       ));
-  p[0U] = s;
-  Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
-  uint8_t kk1 = p1->key_length;
-  uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i.key_length;
-  uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
-  {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
-  }
-  Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
-  return p;
+  p0[0U] = s;
+  return p0;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -675,21 +691,16 @@ Hacl_Hash_Blake2s_Simd128_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2s_Simd128_state_t
-  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 {
@@ -699,28 +710,30 @@ Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2s_Simd128_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
-  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*state).block_state;
+  uint8_t *buf = (*state).buf;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
-  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_Blake2b_index
+  i0 = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec128 *h = block_state.f4;
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -730,8 +743,70 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
-  uint8_t kk11 = i.key_length;
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = h;
+  Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  uint32_t *uu____0 = tmp + 4U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.salt + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    uint32_t *os = uu____0;
+    os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint8_t *bj = pv.personal + i * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    uint32_t *os = uu____1;
+    os[i] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
+  uint8_t kk11 = i0.key_length;
   uint32_t ite;
   if (kk11 != 0U)
   {
@@ -741,15 +816,16 @@ reset_raw(
   {
     ite = 0U;
   }
-  Hacl_Hash_Blake2s_Simd128_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)ite;
+  state->total_len = total_len;
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -758,15 +834,17 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k)
 {
@@ -781,11 +859,16 @@ void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
@@ -793,7 +876,7 @@ void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -802,8 +885,8 @@ Hacl_Hash_Blake2s_Simd128_update(
   uint32_t chunk_len
 )
 {
-  Hacl_Hash_Blake2s_Simd128_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -819,10 +902,8 @@ Hacl_Hash_Blake2s_Simd128_update(
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Hash_Blake2s_Simd128_state_t s1 = *state;
-    Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -835,22 +916,12 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Hash_Blake2s_Simd128_state_t s1 = *state;
-    Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -863,10 +934,8 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
-      Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
-      Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
+      Lib_IntVector_Intrinsics_vec128 *hash = block_state.f4;
+      Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3;
       uint32_t nb = 1U;
       Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb);
     }
@@ -884,32 +953,21 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
-    Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
-    Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
+    Lib_IntVector_Intrinsics_vec128 *hash = block_state.f4;
+    Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Hash_Blake2s_Simd128_state_t s1 = *state;
-    Hacl_Hash_Blake2s_Simd128_block_state_t block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -919,22 +977,12 @@ Hacl_Hash_Blake2s_Simd128_update(
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_Simd128_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Hash_Blake2s_Simd128_state_t s10 = *state;
-    Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -947,12 +995,10 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
-      Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
-      Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
+      Lib_IntVector_Intrinsics_vec128 *hash = block_state.f4;
+      Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3;
       uint32_t nb = 1U;
-      Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb);
+      Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf0, nb);
     }
     uint32_t ite;
     if
@@ -969,40 +1015,39 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
-    Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
-    Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
+    Lib_IntVector_Intrinsics_vec128 *hash = block_state.f4;
+    Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3;
     uint32_t nb = data1_len / 64U;
     Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb);
-    uint8_t *dst = buf;
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_Blake2s_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  uint8_t *buf_ = (*s).buf;
+  uint64_t total_len = (*s).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -1017,11 +1062,12 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
   Hacl_Hash_Blake2s_Simd128_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.thd.snd;
+    { .fst = i1.key_length, .snd = i1.digest_length, .thd = i1.last_node, .f3 = wv0, .f4 = b };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.f4;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.f4;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   uint64_t prev_len = total_len - (uint64_t)r;
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -1032,21 +1078,33 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
-  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc0 = tmp_block_state.thd;
-  Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst;
-  Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd;
+  Lib_IntVector_Intrinsics_vec128 *hash0 = tmp_block_state.f4;
+  Lib_IntVector_Intrinsics_vec128 *wv1 = tmp_block_state.f3;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc = tmp_block_state.thd;
-  Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
-  Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
-  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Lib_IntVector_Intrinsics_vec128 *hash = tmp_block_state.f4;
+  Lib_IntVector_Intrinsics_vec128 *wv = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
+  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn1, dst, tmp_block_state.f4);
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1057,8 +1115,8 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec128 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec128 *b = block_state.f4;
+  Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1066,18 +1124,18 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state)
 {
-  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -1091,9 +1149,10 @@ Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.thd.snd;
+  block_state =
+    { .fst = i.key_length, .snd = i.digest_length, .thd = i.last_node, .f3 = wv, .f4 = b };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.f4;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.f4;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1135,8 +1194,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
@@ -1161,25 +1226,27 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
   uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
   r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
   r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  uint32_t *uu____0 = tmp + 4U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 4U;
     uint8_t *bj = params.salt + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____0;
     os[i] = x;);
+  uint32_t *uu____1 = tmp + 6U;
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = tmp + 6U;
     uint8_t *bj = params.personal + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = uu____1;
     os[i] = x;);
   tmp[0U] =
     (uint32_t)params.digest_length
diff --git a/src/Hacl_Hash_MD5.c b/src/Hacl_Hash_MD5.c
index ed294839..55c755f2 100644
--- a/src/Hacl_Hash_MD5.c
+++ b/src/Hacl_Hash_MD5.c
@@ -1167,24 +1167,21 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_MD5_malloc(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(4U, sizeof (uint32_t));
+  Hacl_Hash_MD5_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_MD5_init(block_state);
   return p;
 }
 
 void Hacl_Hash_MD5_reset(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_MD5_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 /**
@@ -1193,8 +1190,8 @@ void Hacl_Hash_MD5_reset(Hacl_Streaming_MD_state_32 *state)
 Hacl_Streaming_Types_error_code
 Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_32 s = *state;
-  uint64_t total_len = s.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 2305843009213693951ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -1210,10 +1207,8 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1226,22 +1221,12 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1253,7 +1238,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_MD5_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_MD5_update_multi(block_state, buf, 1U);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1269,28 +1254,18 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U);
+    Hacl_Hash_MD5_update_multi(block_state, data1, data1_len / 64U);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -1300,22 +1275,12 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_32 s10 = *state;
-    uint32_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1327,7 +1292,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_MD5_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_MD5_update_multi(block_state, buf0, 1U);
     }
     uint32_t ite;
     if
@@ -1344,28 +1309,19 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U);
-    uint8_t *dst = buf;
+    Hacl_Hash_MD5_update_multi(block_state, data1, data1_len / 64U);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 void Hacl_Hash_MD5_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -1378,6 +1334,7 @@ void Hacl_Hash_MD5_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[4U] = { 0U };
   memcpy(tmp_block_state, block_state, 4U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -1388,7 +1345,6 @@ void Hacl_Hash_MD5_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_MD5_update_multi(tmp_block_state, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_MD5_update_last(tmp_block_state, prev_len_last, buf_last, r);
@@ -1407,10 +1363,9 @@ void Hacl_Hash_MD5_free(Hacl_Streaming_MD_state_32 *state)
 
 Hacl_Streaming_MD_state_32 *Hacl_Hash_MD5_copy(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint32_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(4U, sizeof (uint32_t));
diff --git a/src/Hacl_Hash_SHA1.c b/src/Hacl_Hash_SHA1.c
index 1a8b09b1..ec9ce312 100644
--- a/src/Hacl_Hash_SHA1.c
+++ b/src/Hacl_Hash_SHA1.c
@@ -200,24 +200,21 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA1_malloc(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(5U, sizeof (uint32_t));
+  Hacl_Hash_SHA1_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_SHA1_init(block_state);
   return p;
 }
 
 void Hacl_Hash_SHA1_reset(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_SHA1_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 /**
@@ -226,8 +223,8 @@ void Hacl_Hash_SHA1_reset(Hacl_Streaming_MD_state_32 *state)
 Hacl_Streaming_Types_error_code
 Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_32 s = *state;
-  uint64_t total_len = s.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 2305843009213693951ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -243,10 +240,8 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -259,22 +254,12 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -286,7 +271,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA1_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_SHA1_update_multi(block_state, buf, 1U);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -302,28 +287,18 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U);
+    Hacl_Hash_SHA1_update_multi(block_state, data1, data1_len / 64U);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -333,22 +308,12 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_32 s10 = *state;
-    uint32_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -360,7 +325,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA1_update_multi(block_state1, buf, 1U);
+      Hacl_Hash_SHA1_update_multi(block_state, buf0, 1U);
     }
     uint32_t ite;
     if
@@ -377,28 +342,19 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U);
-    uint8_t *dst = buf;
+    Hacl_Hash_SHA1_update_multi(block_state, data1, data1_len / 64U);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
 
 void Hacl_Hash_SHA1_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -411,6 +367,7 @@ void Hacl_Hash_SHA1_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[5U] = { 0U };
   memcpy(tmp_block_state, block_state, 5U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -421,7 +378,6 @@ void Hacl_Hash_SHA1_digest(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA1_update_multi(tmp_block_state, buf_multi, 0U);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA1_update_last(tmp_block_state, prev_len_last, buf_last, r);
@@ -440,10 +396,9 @@ void Hacl_Hash_SHA1_free(Hacl_Streaming_MD_state_32 *state)
 
 Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA1_copy(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint32_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(5U, sizeof (uint32_t));
diff --git a/src/Hacl_Hash_SHA2.c b/src/Hacl_Hash_SHA2.c
index 995fe707..7f2e606d 100644
--- a/src/Hacl_Hash_SHA2.c
+++ b/src/Hacl_Hash_SHA2.c
@@ -33,8 +33,8 @@ void Hacl_Hash_SHA2_sha256_init(uint32_t *hash)
     0U,
     8U,
     1U,
-    uint32_t *os = hash;
     uint32_t x = Hacl_Hash_SHA2_h256[i];
+    uint32_t *os = hash;
     os[i] = x;);
 }
 
@@ -140,8 +140,8 @@ static inline void sha256_update(uint8_t *b, uint32_t *hash)
     0U,
     8U,
     1U,
-    uint32_t *os = hash;
     uint32_t x = hash[i] + hash_old[i];
+    uint32_t *os = hash;
     os[i] = x;);
 }
 
@@ -206,12 +206,12 @@ void Hacl_Hash_SHA2_sha224_init(uint32_t *hash)
     0U,
     8U,
     1U,
-    uint32_t *os = hash;
     uint32_t x = Hacl_Hash_SHA2_h224[i];
+    uint32_t *os = hash;
     os[i] = x;);
 }
 
-static inline void sha224_update_nblocks(uint32_t len, uint8_t *b, uint32_t *st)
+void Hacl_Hash_SHA2_sha224_update_nblocks(uint32_t len, uint8_t *b, uint32_t *st)
 {
   Hacl_Hash_SHA2_sha256_update_nblocks(len, b, st);
 }
@@ -234,8 +234,8 @@ void Hacl_Hash_SHA2_sha512_init(uint64_t *hash)
     0U,
     8U,
     1U,
-    uint64_t *os = hash;
     uint64_t x = Hacl_Hash_SHA2_h512[i];
+    uint64_t *os = hash;
     os[i] = x;);
 }
 
@@ -341,8 +341,8 @@ static inline void sha512_update(uint8_t *b, uint64_t *hash)
     0U,
     8U,
     1U,
-    uint64_t *os = hash;
     uint64_t x = hash[i] + hash_old[i];
+    uint64_t *os = hash;
     os[i] = x;);
 }
 
@@ -412,8 +412,8 @@ void Hacl_Hash_SHA2_sha384_init(uint64_t *hash)
     0U,
     8U,
     1U,
-    uint64_t *os = hash;
     uint64_t x = Hacl_Hash_SHA2_h384[i];
+    uint64_t *os = hash;
     os[i] = x;);
 }
 
@@ -448,12 +448,12 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA2_malloc_256(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(8U, sizeof (uint32_t));
+  Hacl_Hash_SHA2_sha256_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha256_init(block_state);
   return p;
 }
 
@@ -465,10 +465,9 @@ more (different) data into the hash in each branch.
 */
 Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA2_copy_256(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint32_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(8U, sizeof (uint32_t));
@@ -486,20 +485,17 @@ Reset an existing state to the initial hash state with empty data.
 */
 void Hacl_Hash_SHA2_reset_256(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha256_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 static inline Hacl_Streaming_Types_error_code
 update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_32 s = *state;
-  uint64_t total_len = s.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 2305843009213693951ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -515,10 +511,8 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -531,22 +525,12 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -558,7 +542,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf, block_state1);
+      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf, block_state);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -574,28 +558,18 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1);
+    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_32 s1 = *state;
-    uint32_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -605,22 +579,12 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_32 s10 = *state;
-    uint32_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -632,7 +596,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf, block_state1);
+      Hacl_Hash_SHA2_sha256_update_nblocks(64U, buf0, block_state);
     }
     uint32_t ite;
     if
@@ -649,18 +613,10 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1);
-    uint8_t *dst = buf;
+    Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_32){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -690,10 +646,9 @@ the state and therefore does not invalidate the client-held state `p`.)
 */
 void Hacl_Hash_SHA2_digest_256(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -706,6 +661,7 @@ void Hacl_Hash_SHA2_digest_256(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -716,7 +672,6 @@ void Hacl_Hash_SHA2_digest_256(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA2_sha256_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha256_update_last(prev_len_last + (uint64_t)r, r, buf_last, tmp_block_state);
@@ -761,24 +716,21 @@ Hacl_Streaming_MD_state_32 *Hacl_Hash_SHA2_malloc_224(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *block_state = (uint32_t *)KRML_HOST_CALLOC(8U, sizeof (uint32_t));
+  Hacl_Hash_SHA2_sha224_init(block_state);
   Hacl_Streaming_MD_state_32
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_32
   *p = (Hacl_Streaming_MD_state_32 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_32));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha224_init(block_state);
   return p;
 }
 
 void Hacl_Hash_SHA2_reset_224(Hacl_Streaming_MD_state_32 *state)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint32_t *block_state = scrut.block_state;
+  uint32_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha224_init(block_state);
-  Hacl_Streaming_MD_state_32
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 Hacl_Streaming_Types_error_code
@@ -798,10 +750,9 @@ the hash via `update_224`.
 */
 void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_32 scrut = *state;
-  uint32_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint32_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -814,6 +765,7 @@ void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint32_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint32_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 64U == 0U && r > 0U)
   {
@@ -824,8 +776,7 @@ void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
     ite = r % 64U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
-  sha224_update_nblocks(0U, buf_multi, tmp_block_state);
+  Hacl_Hash_SHA2_sha224_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha224_update_last(prev_len_last + (uint64_t)r, r, buf_last, tmp_block_state);
   Hacl_Hash_SHA2_sha224_finish(tmp_block_state, output);
@@ -847,7 +798,7 @@ void Hacl_Hash_SHA2_hash_224(uint8_t *output, uint8_t *input, uint32_t input_len
   Hacl_Hash_SHA2_sha224_init(st);
   uint32_t rem = input_len % 64U;
   uint64_t len_ = (uint64_t)input_len;
-  sha224_update_nblocks(input_len, ib, st);
+  Hacl_Hash_SHA2_sha224_update_nblocks(input_len, ib, st);
   uint32_t rem1 = input_len % 64U;
   uint8_t *b0 = ib;
   uint8_t *lb = b0 + input_len - rem1;
@@ -859,12 +810,12 @@ Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_malloc_512(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *block_state = (uint64_t *)KRML_HOST_CALLOC(8U, sizeof (uint64_t));
+  Hacl_Hash_SHA2_sha512_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64
   *p = (Hacl_Streaming_MD_state_64 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_64));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha512_init(block_state);
   return p;
 }
 
@@ -876,10 +827,9 @@ more (different) data into the hash in each branch.
 */
 Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_copy_512(Hacl_Streaming_MD_state_64 *state)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint64_t *block_state0 = scrut.block_state;
-  uint8_t *buf0 = scrut.buf;
-  uint64_t total_len0 = scrut.total_len;
+  uint64_t *block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   uint64_t *block_state = (uint64_t *)KRML_HOST_CALLOC(8U, sizeof (uint64_t));
@@ -894,20 +844,17 @@ Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_copy_512(Hacl_Streaming_MD_state_64 *
 
 void Hacl_Hash_SHA2_reset_512(Hacl_Streaming_MD_state_64 *state)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint64_t *block_state = scrut.block_state;
+  uint64_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha512_init(block_state);
-  Hacl_Streaming_MD_state_64
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 static inline Hacl_Streaming_Types_error_code
 update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Streaming_MD_state_64 s = *state;
-  uint64_t total_len = s.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 18446744073709551615ULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -923,10 +870,8 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
   }
   if (chunk_len <= 128U - sz)
   {
-    Hacl_Streaming_MD_state_64 s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -939,22 +884,12 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2
-        }
-      );
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Streaming_MD_state_64 s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -966,7 +901,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf, block_state1);
+      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf, block_state);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)128U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -982,28 +917,18 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1);
+    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = 128U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Streaming_MD_state_64 s1 = *state;
-    uint64_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)128U == 0ULL && total_len10 > 0ULL)
     {
@@ -1013,22 +938,12 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)128U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Streaming_MD_state_64 s10 = *state;
-    uint64_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL)
     {
@@ -1040,7 +955,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     }
     if (!(sz1 == 0U))
     {
-      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf, block_state1);
+      Hacl_Hash_SHA2_sha512_update_nblocks(128U, buf0, block_state);
     }
     uint32_t ite;
     if
@@ -1057,18 +972,10 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1);
-    uint8_t *dst = buf;
+    Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Streaming_MD_state_64){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1098,10 +1005,9 @@ the state and therefore does not invalidate the client-held state `p`.)
 */
 void Hacl_Hash_SHA2_digest_512(Hacl_Streaming_MD_state_64 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint64_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL)
   {
@@ -1114,6 +1020,7 @@ void Hacl_Hash_SHA2_digest_512(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint64_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 128U == 0U && r > 0U)
   {
@@ -1124,7 +1031,6 @@ void Hacl_Hash_SHA2_digest_512(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
     ite = r % 128U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA2_sha512_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha512_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(prev_len_last),
@@ -1173,24 +1079,21 @@ Hacl_Streaming_MD_state_64 *Hacl_Hash_SHA2_malloc_384(void)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *block_state = (uint64_t *)KRML_HOST_CALLOC(8U, sizeof (uint64_t));
+  Hacl_Hash_SHA2_sha384_init(block_state);
   Hacl_Streaming_MD_state_64
   s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
   Hacl_Streaming_MD_state_64
   *p = (Hacl_Streaming_MD_state_64 *)KRML_HOST_MALLOC(sizeof (Hacl_Streaming_MD_state_64));
   p[0U] = s;
-  Hacl_Hash_SHA2_sha384_init(block_state);
   return p;
 }
 
 void Hacl_Hash_SHA2_reset_384(Hacl_Streaming_MD_state_64 *state)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint8_t *buf = scrut.buf;
-  uint64_t *block_state = scrut.block_state;
+  uint64_t *block_state = (*state).block_state;
   Hacl_Hash_SHA2_sha384_init(block_state);
-  Hacl_Streaming_MD_state_64
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 Hacl_Streaming_Types_error_code
@@ -1210,10 +1113,9 @@ the hash via `update_384`.
 */
 void Hacl_Hash_SHA2_digest_384(Hacl_Streaming_MD_state_64 *state, uint8_t *output)
 {
-  Hacl_Streaming_MD_state_64 scrut = *state;
-  uint64_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL)
   {
@@ -1226,6 +1128,7 @@ void Hacl_Hash_SHA2_digest_384(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
   uint8_t *buf_1 = buf_;
   uint64_t tmp_block_state[8U] = { 0U };
   memcpy(tmp_block_state, block_state, 8U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 128U == 0U && r > 0U)
   {
@@ -1236,7 +1139,6 @@ void Hacl_Hash_SHA2_digest_384(Hacl_Streaming_MD_state_64 *state, uint8_t *outpu
     ite = r % 128U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Hacl_Hash_SHA2_sha384_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha384_update_last(FStar_UInt128_add(FStar_UInt128_uint64_to_uint128(prev_len_last),
diff --git a/src/Hacl_Hash_SHA3.c b/src/Hacl_Hash_SHA3.c
index 89bb0491..8d35d4fe 100644
--- a/src/Hacl_Hash_SHA3.c
+++ b/src/Hacl_Hash_SHA3.c
@@ -251,7 +251,8 @@ Hacl_Hash_SHA3_update_multi_sha3(
     uint8_t *bl0 = b_;
     uint8_t *uu____0 = b0 + i * block_len(a);
     memcpy(bl0, uu____0, block_len(a) * sizeof (uint8_t));
-    block_len(a);
+    uint32_t unused = block_len(a);
+    KRML_MAYBE_UNUSED_VAR(unused);
     absorb_inner_32(b_, s);
   }
 }
@@ -544,13 +545,6 @@ Hacl_Hash_SHA3_update_last_sha3(
   absorb_inner_32(b3, s);
 }
 
-typedef struct hash_buf2_s
-{
-  Hacl_Hash_SHA3_hash_buf fst;
-  Hacl_Hash_SHA3_hash_buf snd;
-}
-hash_buf2;
-
 Spec_Hash_Definitions_hash_alg Hacl_Hash_SHA3_get_alg(Hacl_Hash_SHA3_state_t *s)
 {
   Hacl_Hash_SHA3_hash_buf block_state = (*s).block_state;
@@ -563,13 +557,13 @@ Hacl_Hash_SHA3_state_t *Hacl_Hash_SHA3_malloc(Spec_Hash_Definitions_hash_alg a)
   uint8_t *buf0 = (uint8_t *)KRML_HOST_CALLOC(block_len(a), sizeof (uint8_t));
   uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
   Hacl_Hash_SHA3_hash_buf block_state = { .fst = a, .snd = buf };
+  uint64_t *s = block_state.snd;
+  memset(s, 0U, 25U * sizeof (uint64_t));
   Hacl_Hash_SHA3_state_t
-  s = { .block_state = block_state, .buf = buf0, .total_len = (uint64_t)0U };
+  s0 = { .block_state = block_state, .buf = buf0, .total_len = (uint64_t)0U };
   Hacl_Hash_SHA3_state_t
   *p = (Hacl_Hash_SHA3_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_SHA3_state_t));
-  p[0U] = s;
-  uint64_t *s1 = block_state.snd;
-  memset(s1, 0U, 25U * sizeof (uint64_t));
+  p[0U] = s0;
   return p;
 }
 
@@ -586,19 +580,17 @@ void Hacl_Hash_SHA3_free(Hacl_Hash_SHA3_state_t *state)
 
 Hacl_Hash_SHA3_state_t *Hacl_Hash_SHA3_copy(Hacl_Hash_SHA3_state_t *state)
 {
-  Hacl_Hash_SHA3_state_t scrut0 = *state;
-  Hacl_Hash_SHA3_hash_buf block_state0 = scrut0.block_state;
-  uint8_t *buf0 = scrut0.buf;
-  uint64_t total_len0 = scrut0.total_len;
+  Hacl_Hash_SHA3_hash_buf block_state0 = (*state).block_state;
+  uint8_t *buf0 = (*state).buf;
+  uint64_t total_len0 = (*state).total_len;
   Spec_Hash_Definitions_hash_alg i = block_state0.fst;
   KRML_CHECK_SIZE(sizeof (uint8_t), block_len(i));
   uint8_t *buf1 = (uint8_t *)KRML_HOST_CALLOC(block_len(i), sizeof (uint8_t));
   memcpy(buf1, buf0, block_len(i) * sizeof (uint8_t));
   uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
   Hacl_Hash_SHA3_hash_buf block_state = { .fst = i, .snd = buf };
-  hash_buf2 scrut = { .fst = block_state0, .snd = block_state };
-  uint64_t *s_dst = scrut.snd.snd;
-  uint64_t *s_src = scrut.fst.snd;
+  uint64_t *s_src = block_state0.snd;
+  uint64_t *s_dst = block_state.snd;
   memcpy(s_dst, s_src, 25U * sizeof (uint64_t));
   Hacl_Hash_SHA3_state_t
   s = { .block_state = block_state, .buf = buf1, .total_len = total_len0 };
@@ -610,24 +602,18 @@ Hacl_Hash_SHA3_state_t *Hacl_Hash_SHA3_copy(Hacl_Hash_SHA3_state_t *state)
 
 void Hacl_Hash_SHA3_reset(Hacl_Hash_SHA3_state_t *state)
 {
-  Hacl_Hash_SHA3_state_t scrut = *state;
-  uint8_t *buf = scrut.buf;
-  Hacl_Hash_SHA3_hash_buf block_state = scrut.block_state;
-  Spec_Hash_Definitions_hash_alg i = block_state.fst;
-  KRML_MAYBE_UNUSED_VAR(i);
+  Hacl_Hash_SHA3_hash_buf block_state = (*state).block_state;
   uint64_t *s = block_state.snd;
   memset(s, 0U, 25U * sizeof (uint64_t));
-  Hacl_Hash_SHA3_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
 }
 
 Hacl_Streaming_Types_error_code
 Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_Hash_SHA3_state_t s = *state;
-  Hacl_Hash_SHA3_hash_buf block_state = s.block_state;
-  uint64_t total_len = s.total_len;
+  Hacl_Hash_SHA3_hash_buf block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   Spec_Hash_Definitions_hash_alg i = block_state.fst;
   if ((uint64_t)chunk_len > 0xFFFFFFFFFFFFFFFFULL - total_len)
   {
@@ -644,10 +630,8 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
   }
   if (chunk_len <= block_len(i) - sz)
   {
-    Hacl_Hash_SHA3_state_t s1 = *state;
-    Hacl_Hash_SHA3_hash_buf block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)block_len(i) == 0ULL && total_len1 > 0ULL)
     {
@@ -660,16 +644,12 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      ((Hacl_Hash_SHA3_state_t){ .block_state = block_state1, .buf = buf, .total_len = total_len2 });
+    state->total_len = total_len2;
   }
   else if (sz == 0U)
   {
-    Hacl_Hash_SHA3_state_t s1 = *state;
-    Hacl_Hash_SHA3_hash_buf block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)block_len(i) == 0ULL && total_len1 > 0ULL)
     {
@@ -681,9 +661,9 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
     }
     if (!(sz1 == 0U))
     {
-      Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
-      uint64_t *s2 = block_state1.snd;
-      Hacl_Hash_SHA3_update_multi_sha3(a1, s2, buf, block_len(i) / block_len(a1));
+      Spec_Hash_Definitions_hash_alg a1 = block_state.fst;
+      uint64_t *s1 = block_state.snd;
+      Hacl_Hash_SHA3_update_multi_sha3(a1, s1, buf, block_len(i) / block_len(a1));
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)block_len(i) == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -699,30 +679,20 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
-    uint64_t *s2 = block_state1.snd;
-    Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1));
+    Spec_Hash_Definitions_hash_alg a1 = block_state.fst;
+    uint64_t *s1 = block_state.snd;
+    Hacl_Hash_SHA3_update_multi_sha3(a1, s1, data1, data1_len / block_len(a1));
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_SHA3_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
   }
   else
   {
     uint32_t diff = block_len(i) - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_Hash_SHA3_state_t s1 = *state;
-    Hacl_Hash_SHA3_hash_buf block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
     uint32_t sz10;
     if (total_len10 % (uint64_t)block_len(i) == 0ULL && total_len10 > 0ULL)
     {
@@ -732,22 +702,12 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)block_len(i));
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_Hash_SHA3_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2
-        }
-      );
-    Hacl_Hash_SHA3_state_t s10 = *state;
-    Hacl_Hash_SHA3_hash_buf block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
+    state->total_len = total_len2;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
     uint32_t sz1;
     if (total_len1 % (uint64_t)block_len(i) == 0ULL && total_len1 > 0ULL)
     {
@@ -759,9 +719,9 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
     }
     if (!(sz1 == 0U))
     {
-      Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
-      uint64_t *s2 = block_state1.snd;
-      Hacl_Hash_SHA3_update_multi_sha3(a1, s2, buf, block_len(i) / block_len(a1));
+      Spec_Hash_Definitions_hash_alg a1 = block_state.fst;
+      uint64_t *s1 = block_state.snd;
+      Hacl_Hash_SHA3_update_multi_sha3(a1, s1, buf0, block_len(i) / block_len(a1));
     }
     uint32_t ite;
     if
@@ -783,20 +743,12 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
-    uint64_t *s2 = block_state1.snd;
-    Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1));
-    uint8_t *dst = buf;
+    Spec_Hash_Definitions_hash_alg a1 = block_state.fst;
+    uint64_t *s1 = block_state.snd;
+    Hacl_Hash_SHA3_update_multi_sha3(a1, s1, data1, data1_len / block_len(a1));
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_Hash_SHA3_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff)
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -809,10 +761,9 @@ digest_(
   uint32_t l
 )
 {
-  Hacl_Hash_SHA3_state_t scrut0 = *state;
-  Hacl_Hash_SHA3_hash_buf block_state = scrut0.block_state;
-  uint8_t *buf_ = scrut0.buf;
-  uint64_t total_len = scrut0.total_len;
+  Hacl_Hash_SHA3_hash_buf block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
   uint32_t r;
   if (total_len % (uint64_t)block_len(a) == 0ULL && total_len > 0ULL)
   {
@@ -825,10 +776,10 @@ digest_(
   uint8_t *buf_1 = buf_;
   uint64_t buf[25U] = { 0U };
   Hacl_Hash_SHA3_hash_buf tmp_block_state = { .fst = a, .snd = buf };
-  hash_buf2 scrut = { .fst = block_state, .snd = tmp_block_state };
-  uint64_t *s_dst = scrut.snd.snd;
-  uint64_t *s_src = scrut.fst.snd;
+  uint64_t *s_src = block_state.snd;
+  uint64_t *s_dst = tmp_block_state.snd;
   memcpy(s_dst, s_src, 25U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % block_len(a) == 0U && r > 0U)
   {
@@ -839,7 +790,6 @@ digest_(
     ite = r % block_len(a);
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   Spec_Hash_Definitions_hash_alg a1 = tmp_block_state.fst;
   uint64_t *s0 = tmp_block_state.snd;
   Hacl_Hash_SHA3_update_multi_sha3(a1, s0, buf_multi, 0U / block_len(a1));
@@ -2166,7 +2116,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s)
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -2191,14 +2141,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/src/Hacl_Hash_SHA3_Simd256.c b/src/Hacl_Hash_SHA3_Simd256.c
index 131c34e6..e0bb7e0b 100644
--- a/src/Hacl_Hash_SHA3_Simd256.c
+++ b/src/Hacl_Hash_SHA3_Simd256.c
@@ -5992,12 +5992,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -6038,15 +6038,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -6378,7 +6378,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/src/Hacl_K256_ECDSA.c b/src/Hacl_K256_ECDSA.c
index 0b72b166..26138792 100644
--- a/src/Hacl_K256_ECDSA.c
+++ b/src/Hacl_K256_ECDSA.c
@@ -30,34 +30,32 @@
 #include "internal/Hacl_Bignum_K256.h"
 #include "internal/Hacl_Bignum_Base.h"
 
-static inline uint64_t
-bn_add(uint32_t aLen, uint64_t *a, uint32_t bLen, uint64_t *b, uint64_t *res)
+static inline uint64_t bn_add_sa(uint32_t aLen, uint32_t bLen, uint64_t *b, uint64_t *res)
 {
-  uint64_t *a0 = a;
   uint64_t *res0 = res;
   uint64_t c0 = 0ULL;
   for (uint32_t i = 0U; i < bLen / 4U; i++)
   {
-    uint64_t t1 = a0[4U * i];
+    uint64_t t1 = res0[4U * i];
     uint64_t t20 = b[4U * i];
     uint64_t *res_i0 = res0 + 4U * i;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0);
-    uint64_t t10 = a0[4U * i + 1U];
+    uint64_t t10 = res0[4U * i + 1U];
     uint64_t t21 = b[4U * i + 1U];
     uint64_t *res_i1 = res0 + 4U * i + 1U;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1);
-    uint64_t t11 = a0[4U * i + 2U];
+    uint64_t t11 = res0[4U * i + 2U];
     uint64_t t22 = b[4U * i + 2U];
     uint64_t *res_i2 = res0 + 4U * i + 2U;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2);
-    uint64_t t12 = a0[4U * i + 3U];
+    uint64_t t12 = res0[4U * i + 3U];
     uint64_t t2 = b[4U * i + 3U];
     uint64_t *res_i = res0 + 4U * i + 3U;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i);
   }
   for (uint32_t i = bLen / 4U * 4U; i < bLen; i++)
   {
-    uint64_t t1 = a0[i];
+    uint64_t t1 = res0[i];
     uint64_t t2 = b[i];
     uint64_t *res_i = res0 + i;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t2, res_i);
@@ -65,27 +63,26 @@ bn_add(uint32_t aLen, uint64_t *a, uint32_t bLen, uint64_t *b, uint64_t *res)
   uint64_t c00 = c0;
   if (bLen < aLen)
   {
-    uint64_t *a1 = a + bLen;
     uint64_t *res1 = res + bLen;
     uint64_t c = c00;
     for (uint32_t i = 0U; i < (aLen - bLen) / 4U; i++)
     {
-      uint64_t t1 = a1[4U * i];
+      uint64_t t1 = res1[4U * i];
       uint64_t *res_i0 = res1 + 4U * i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i0);
-      uint64_t t10 = a1[4U * i + 1U];
+      uint64_t t10 = res1[4U * i + 1U];
       uint64_t *res_i1 = res1 + 4U * i + 1U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, 0ULL, res_i1);
-      uint64_t t11 = a1[4U * i + 2U];
+      uint64_t t11 = res1[4U * i + 2U];
       uint64_t *res_i2 = res1 + 4U * i + 2U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, 0ULL, res_i2);
-      uint64_t t12 = a1[4U * i + 3U];
+      uint64_t t12 = res1[4U * i + 3U];
       uint64_t *res_i = res1 + 4U * i + 3U;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, 0ULL, res_i);
     }
     for (uint32_t i = (aLen - bLen) / 4U * 4U; i < aLen - bLen; i++)
     {
-      uint64_t t1 = a1[i];
+      uint64_t t1 = res1[i];
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, 0ULL, res_i);
     }
@@ -167,8 +164,8 @@ static void add_mod4(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -221,8 +218,8 @@ static void sub_mod4(uint64_t *n, uint64_t *a, uint64_t *b, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -261,8 +258,8 @@ static void sqr4(uint64_t *a, uint64_t *res)
     0U,
     4U,
     1U,
-    uint64_t *ab = a;
     uint64_t a_j = a[i0];
+    uint64_t *ab = a;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -288,7 +285,12 @@ static void sqr4(uint64_t *a, uint64_t *res)
     }
     uint64_t r = c;
     res[i0 + i0] = r;);
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, res, res);
+  uint64_t a_copy0[8U] = { 0U };
+  uint64_t b_copy0[8U] = { 0U };
+  memcpy(a_copy0, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy0, res, 8U * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint64_t tmp[8U] = { 0U };
   KRML_MAYBE_FOR4(i,
@@ -300,7 +302,12 @@ static void sqr4(uint64_t *a, uint64_t *res)
     uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, tmp, res);
+  uint64_t a_copy[8U] = { 0U };
+  uint64_t b_copy[8U] = { 0U };
+  memcpy(a_copy, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy, tmp, 8U * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -339,9 +346,9 @@ static inline uint64_t load_qelem_check(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = f;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = f;
     os[i] = x;);
   uint64_t is_zero = is_qelem_zero(f);
   uint64_t acc = 0ULL;
@@ -351,7 +358,7 @@ static inline uint64_t load_qelem_check(uint64_t *f, uint8_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(f[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(f[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t is_lt_q = acc;
   return ~is_zero & is_lt_q;
 }
@@ -362,9 +369,9 @@ static inline bool load_qelem_vartime(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = f;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = f;
     os[i] = x;);
   bool is_zero = is_qelem_zero_vartime(f);
   uint64_t a0 = f[0U];
@@ -372,11 +379,7 @@ static inline bool load_qelem_vartime(uint64_t *f, uint8_t *b)
   uint64_t a2 = f[2U];
   uint64_t a3 = f[3U];
   bool is_lt_q_b;
-  if (a3 < 0xffffffffffffffffULL)
-  {
-    is_lt_q_b = true;
-  }
-  else if (a2 < 0xfffffffffffffffeULL)
+  if (a3 < 0xffffffffffffffffULL || a2 < 0xfffffffffffffffeULL)
   {
     is_lt_q_b = true;
   }
@@ -412,8 +415,8 @@ static inline void modq_short(uint64_t *out, uint64_t *a)
     0U,
     4U,
     1U,
-    uint64_t *os = out;
     uint64_t x = (mask & out[i]) | (~mask & a[i]);
+    uint64_t *os = out;
     os[i] = x;);
 }
 
@@ -424,9 +427,9 @@ static inline void load_qelem_modq(uint64_t *f, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = f;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = f;
     os[i] = x;);
   memcpy(tmp, f, 4U * sizeof (uint64_t));
   modq_short(f, tmp);
@@ -494,8 +497,8 @@ mul_pow2_256_minus_q_add(
     uint64_t r = c;
     tmp[len + i0] = r;);
   memcpy(res + 2U, a, len * sizeof (uint64_t));
-  bn_add(resLen, res, len + 2U, tmp, res);
-  uint64_t c = bn_add(resLen, res, 4U, e, res);
+  bn_add_sa(resLen, len + 2U, tmp, res);
+  uint64_t c = bn_add_sa(resLen, 4U, e, res);
   return c;
 }
 
@@ -510,18 +513,26 @@ static inline void modq(uint64_t *out, uint64_t *a)
   uint64_t *t01 = tmp;
   uint64_t m[7U] = { 0U };
   uint64_t p[5U] = { 0U };
-  mul_pow2_256_minus_q_add(4U, 7U, t01, a + 4U, a, m);
-  mul_pow2_256_minus_q_add(3U, 5U, t01, m + 4U, m, p);
-  uint64_t c2 = mul_pow2_256_minus_q_add(1U, 4U, t01, p + 4U, p, r);
-  uint64_t c0 = c2;
+  uint64_t *a0 = a;
+  uint64_t *a1 = a + 4U;
+  uint64_t c0 = mul_pow2_256_minus_q_add(4U, 7U, t01, a1, a0, m);
+  KRML_MAYBE_UNUSED_VAR(c0);
+  uint64_t *m0 = m;
+  uint64_t *m1 = m + 4U;
+  uint64_t c10 = mul_pow2_256_minus_q_add(3U, 5U, t01, m1, m0, p);
+  KRML_MAYBE_UNUSED_VAR(c10);
+  uint64_t *p0 = p;
+  uint64_t *p1 = p + 4U;
+  uint64_t c2 = mul_pow2_256_minus_q_add(1U, 4U, t01, p1, p0, r);
+  uint64_t c00 = c2;
   uint64_t c1 = add4(r, tmp, out);
-  uint64_t mask = 0ULL - (c0 + c1);
+  uint64_t mask = 0ULL - (c00 + c1);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint64_t *os = out;
     uint64_t x = (mask & out[i]) | (~mask & r[i]);
+    uint64_t *os = out;
     os[i] = x;);
 }
 
@@ -549,7 +560,9 @@ static inline void qnegate_conditional_vartime(uint64_t *f, bool is_negate)
   uint64_t zero[4U] = { 0U };
   if (is_negate)
   {
-    sub_mod4(n, zero, f, f);
+    uint64_t b_copy[4U] = { 0U };
+    memcpy(b_copy, f, 4U * sizeof (uint64_t));
+    sub_mod4(n, zero, b_copy, f);
   }
 }
 
@@ -567,11 +580,7 @@ static inline bool is_qelem_le_q_halved_vartime(uint64_t *f)
   {
     return false;
   }
-  if (a2 < 0xffffffffffffffffULL)
-  {
-    return true;
-  }
-  if (a1 < 0x5d576e7357a4501dULL)
+  if (a2 < 0xffffffffffffffffULL || a1 < 0x5d576e7357a4501dULL)
   {
     return true;
   }
@@ -607,8 +616,8 @@ static inline void qmul_shift_384(uint64_t *res, uint64_t *a, uint64_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x = (mask & res[i]) | (~mask & res_b_padded[i]);
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -616,7 +625,9 @@ static inline void qsquare_times_in_place(uint64_t *out, uint32_t b)
 {
   for (uint32_t i = 0U; i < b; i++)
   {
-    qsqr(out, out);
+    uint64_t f_copy[4U] = { 0U };
+    memcpy(f_copy, out, 4U * sizeof (uint64_t));
+    qsqr(out, f_copy);
   }
 }
 
@@ -625,7 +636,9 @@ static inline void qsquare_times(uint64_t *out, uint64_t *a, uint32_t b)
   memcpy(out, a, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < b; i++)
   {
-    qsqr(out, out);
+    uint64_t f_copy[4U] = { 0U };
+    memcpy(f_copy, out, 4U * sizeof (uint64_t));
+    qsqr(out, f_copy);
   }
 }
 
@@ -649,68 +662,130 @@ static inline void qinv(uint64_t *out, uint64_t *f)
   uint64_t x8[4U] = { 0U };
   uint64_t x14[4U] = { 0U };
   qsquare_times(x6, x_1101, 2U);
-  qmul(x6, x6, x_1011);
+  uint64_t f1_copy0[4U] = { 0U };
+  memcpy(f1_copy0, x6, 4U * sizeof (uint64_t));
+  qmul(x6, f1_copy0, x_1011);
   qsquare_times(x8, x6, 2U);
-  qmul(x8, x8, x_11);
+  uint64_t f1_copy1[4U] = { 0U };
+  memcpy(f1_copy1, x8, 4U * sizeof (uint64_t));
+  qmul(x8, f1_copy1, x_11);
   qsquare_times(x14, x8, 6U);
-  qmul(x14, x14, x6);
+  uint64_t f1_copy2[4U] = { 0U };
+  memcpy(f1_copy2, x14, 4U * sizeof (uint64_t));
+  qmul(x14, f1_copy2, x6);
   uint64_t x56[4U] = { 0U };
   qsquare_times(out, x14, 14U);
-  qmul(out, out, x14);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy, x14);
   qsquare_times(x56, out, 28U);
-  qmul(x56, x56, out);
+  uint64_t f1_copy3[4U] = { 0U };
+  memcpy(f1_copy3, x56, 4U * sizeof (uint64_t));
+  qmul(x56, f1_copy3, out);
   qsquare_times(out, x56, 56U);
-  qmul(out, out, x56);
+  uint64_t f1_copy4[4U] = { 0U };
+  memcpy(f1_copy4, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy4, x56);
   qsquare_times_in_place(out, 14U);
-  qmul(out, out, x14);
+  uint64_t f1_copy5[4U] = { 0U };
+  memcpy(f1_copy5, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy5, x14);
   qsquare_times_in_place(out, 3U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy6[4U] = { 0U };
+  memcpy(f1_copy6, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy6, x_101);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy7[4U] = { 0U };
+  memcpy(f1_copy7, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy7, x_111);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy8[4U] = { 0U };
+  memcpy(f1_copy8, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy8, x_101);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_1011);
+  uint64_t f1_copy9[4U] = { 0U };
+  memcpy(f1_copy9, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy9, x_1011);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_1011);
+  uint64_t f1_copy10[4U] = { 0U };
+  memcpy(f1_copy10, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy10, x_1011);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy11[4U] = { 0U };
+  memcpy(f1_copy11, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy11, x_111);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy12[4U] = { 0U };
+  memcpy(f1_copy12, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy12, x_111);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy13[4U] = { 0U };
+  memcpy(f1_copy13, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy13, x_1101);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy14[4U] = { 0U };
+  memcpy(f1_copy14, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy14, x_101);
   qsquare_times_in_place(out, 3U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy15[4U] = { 0U };
+  memcpy(f1_copy15, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy15, x_111);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_1001);
+  uint64_t f1_copy16[4U] = { 0U };
+  memcpy(f1_copy16, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy16, x_1001);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_101);
+  uint64_t f1_copy17[4U] = { 0U };
+  memcpy(f1_copy17, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy17, x_101);
   qsquare_times_in_place(out, 10U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy18[4U] = { 0U };
+  memcpy(f1_copy18, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy18, x_111);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_111);
+  uint64_t f1_copy19[4U] = { 0U };
+  memcpy(f1_copy19, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy19, x_111);
   qsquare_times_in_place(out, 9U);
-  qmul(out, out, x8);
+  uint64_t f1_copy20[4U] = { 0U };
+  memcpy(f1_copy20, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy20, x8);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_1001);
+  uint64_t f1_copy21[4U] = { 0U };
+  memcpy(f1_copy21, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy21, x_1001);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_1011);
+  uint64_t f1_copy22[4U] = { 0U };
+  memcpy(f1_copy22, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy22, x_1011);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy23[4U] = { 0U };
+  memcpy(f1_copy23, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy23, x_1101);
   qsquare_times_in_place(out, 5U);
-  qmul(out, out, x_11);
+  uint64_t f1_copy24[4U] = { 0U };
+  memcpy(f1_copy24, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy24, x_11);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy25[4U] = { 0U };
+  memcpy(f1_copy25, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy25, x_1101);
   qsquare_times_in_place(out, 10U);
-  qmul(out, out, x_1101);
+  uint64_t f1_copy26[4U] = { 0U };
+  memcpy(f1_copy26, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy26, x_1101);
   qsquare_times_in_place(out, 4U);
-  qmul(out, out, x_1001);
+  uint64_t f1_copy27[4U] = { 0U };
+  memcpy(f1_copy27, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy27, x_1001);
   qsquare_times_in_place(out, 6U);
-  qmul(out, out, f);
+  uint64_t f1_copy28[4U] = { 0U };
+  memcpy(f1_copy28, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy28, f);
   qsquare_times_in_place(out, 8U);
-  qmul(out, out, x6);
+  uint64_t f1_copy29[4U] = { 0U };
+  memcpy(f1_copy29, out, 4U * sizeof (uint64_t));
+  qmul(out, f1_copy29, x6);
 }
 
 void Hacl_Impl_K256_Point_make_point_at_inf(uint64_t *p)
@@ -735,8 +810,12 @@ static inline void to_aff_point(uint64_t *p_aff, uint64_t *p)
   Hacl_Impl_K256_Finv_finv(zinv, z1);
   Hacl_K256_Field_fmul(x, x1, zinv);
   Hacl_K256_Field_fmul(y, y1, zinv);
-  Hacl_K256_Field_fnormalize(x, x);
-  Hacl_K256_Field_fnormalize(y, y);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, x, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(x, f_copy);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y, f_copy0);
 }
 
 static inline void to_aff_point_x(uint64_t *x, uint64_t *p)
@@ -746,7 +825,9 @@ static inline void to_aff_point_x(uint64_t *x, uint64_t *p)
   uint64_t zinv[5U] = { 0U };
   Hacl_Impl_K256_Finv_finv(zinv, z1);
   Hacl_K256_Field_fmul(x, x1, zinv);
-  Hacl_K256_Field_fnormalize(x, x);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, x, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(x, f_copy);
 }
 
 static inline bool is_on_curve_vartime(uint64_t *p)
@@ -761,12 +842,20 @@ static inline bool is_on_curve_vartime(uint64_t *p)
   b[3U] = 0ULL;
   b[4U] = 0ULL;
   Hacl_K256_Field_fsqr(y2_exp, x);
-  Hacl_K256_Field_fmul(y2_exp, y2_exp, x);
-  Hacl_K256_Field_fadd(y2_exp, y2_exp, b);
-  Hacl_K256_Field_fnormalize(y2_exp, y2_exp);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, y2_exp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(y2_exp, f1_copy, x);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, y2_exp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y2_exp, f1_copy0, b);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y2_exp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2_exp, f_copy0);
   uint64_t y2_comp[5U] = { 0U };
   Hacl_K256_Field_fsqr(y2_comp, y);
-  Hacl_K256_Field_fnormalize(y2_comp, y2_comp);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, y2_comp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2_comp, f_copy);
   bool res = Hacl_K256_Field_is_felem_eq_vartime(y2_exp, y2_comp);
   bool res0 = res;
   return res0;
@@ -810,14 +899,18 @@ void Hacl_Impl_K256_Point_point_negate(uint64_t *out, uint64_t *p)
   oy[2U] = f2;
   oy[3U] = f3;
   oy[4U] = f4;
-  Hacl_K256_Field_fnormalize_weak(oy, oy);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, oy, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(oy, f_copy);
 }
 
 static inline void point_negate_conditional_vartime(uint64_t *p, bool is_negate)
 {
   if (is_negate)
   {
-    Hacl_Impl_K256_Point_point_negate(p, p);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, p, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_Point_point_negate(p, p_copy);
     return;
   }
 }
@@ -894,14 +987,24 @@ static inline bool aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_
   b[3U] = 0ULL;
   b[4U] = 0ULL;
   Hacl_K256_Field_fsqr(y2, x);
-  Hacl_K256_Field_fmul(y2, y2, x);
-  Hacl_K256_Field_fadd(y2, y2, b);
-  Hacl_K256_Field_fnormalize(y2, y2);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, y2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(y2, f1_copy, x);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, y2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y2, f1_copy0, b);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, y2, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2, f_copy0);
   Hacl_Impl_K256_Finv_fsqrt(y, y2);
-  Hacl_K256_Field_fnormalize(y, y);
+  uint64_t f_copy1[5U] = { 0U };
+  memcpy(f_copy1, y, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y, f_copy1);
   uint64_t y2_comp[5U] = { 0U };
   Hacl_K256_Field_fsqr(y2_comp, y);
-  Hacl_K256_Field_fnormalize(y2_comp, y2_comp);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, y2_comp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(y2_comp, f_copy);
   bool res = Hacl_K256_Field_is_felem_eq_vartime(y2, y2_comp);
   bool is_y_valid = res;
   bool is_y_valid0 = is_y_valid;
@@ -932,22 +1035,42 @@ void Hacl_Impl_K256_PointDouble_point_double(uint64_t *out, uint64_t *p)
   Hacl_K256_Field_fsqr(yy, y1);
   Hacl_K256_Field_fsqr(zz, z1);
   Hacl_K256_Field_fmul_small_num(x3, x1, 2ULL);
-  Hacl_K256_Field_fmul(x3, x3, y1);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy, y1);
   Hacl_K256_Field_fmul(tmp1, yy, y1);
   Hacl_K256_Field_fmul(z3, tmp1, z1);
-  Hacl_K256_Field_fmul_small_num(z3, z3, 8ULL);
-  Hacl_K256_Field_fnormalize_weak(z3, z3);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul_small_num(z3, f_copy, 8ULL);
+  uint64_t f_copy1[5U] = { 0U };
+  memcpy(f_copy1, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(z3, f_copy1);
   Hacl_K256_Field_fmul_small_num(bzz3, zz, 21ULL);
-  Hacl_K256_Field_fnormalize_weak(bzz3, bzz3);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, bzz3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(bzz3, f_copy0);
   Hacl_K256_Field_fmul_small_num(bzz9, bzz3, 3ULL);
-  Hacl_K256_Field_fsub(bzz9, yy, bzz9, 6ULL);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, bzz9, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(bzz9, yy, f2_copy, 6ULL);
   Hacl_K256_Field_fadd(tmp1, yy, bzz3);
-  Hacl_K256_Field_fmul(tmp1, bzz9, tmp1);
+  uint64_t f2_copy0[5U] = { 0U };
+  memcpy(f2_copy0, tmp1, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(tmp1, bzz9, f2_copy0);
   Hacl_K256_Field_fmul(y3, yy, zz);
-  Hacl_K256_Field_fmul(x3, x3, bzz9);
-  Hacl_K256_Field_fmul_small_num(y3, y3, 168ULL);
-  Hacl_K256_Field_fadd(y3, tmp1, y3);
-  Hacl_K256_Field_fnormalize_weak(y3, y3);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy0, bzz9);
+  uint64_t f_copy2[5U] = { 0U };
+  memcpy(f_copy2, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul_small_num(y3, f_copy2, 168ULL);
+  uint64_t f2_copy1[5U] = { 0U };
+  memcpy(f2_copy1, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y3, tmp1, f2_copy1);
+  uint64_t f_copy3[5U] = { 0U };
+  memcpy(f_copy3, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(y3, f_copy3);
 }
 
 void Hacl_Impl_K256_PointAdd_point_add(uint64_t *out, uint64_t *p, uint64_t *q)
@@ -976,40 +1099,76 @@ void Hacl_Impl_K256_PointAdd_point_add(uint64_t *out, uint64_t *p, uint64_t *q)
   Hacl_K256_Field_fmul(zz, z1, z2);
   Hacl_K256_Field_fadd(xy_pairs, x1, y1);
   Hacl_K256_Field_fadd(tmp1, x2, y2);
-  Hacl_K256_Field_fmul(xy_pairs, xy_pairs, tmp1);
+  uint64_t f1_copy[5U] = { 0U };
+  memcpy(f1_copy, xy_pairs, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(xy_pairs, f1_copy, tmp1);
   Hacl_K256_Field_fadd(tmp1, xx, yy);
-  Hacl_K256_Field_fsub(xy_pairs, xy_pairs, tmp1, 4ULL);
+  uint64_t f1_copy0[5U] = { 0U };
+  memcpy(f1_copy0, xy_pairs, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(xy_pairs, f1_copy0, tmp1, 4ULL);
   Hacl_K256_Field_fadd(yz_pairs, y1, z1);
   Hacl_K256_Field_fadd(tmp1, y2, z2);
-  Hacl_K256_Field_fmul(yz_pairs, yz_pairs, tmp1);
+  uint64_t f1_copy1[5U] = { 0U };
+  memcpy(f1_copy1, yz_pairs, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(yz_pairs, f1_copy1, tmp1);
   Hacl_K256_Field_fadd(tmp1, yy, zz);
-  Hacl_K256_Field_fsub(yz_pairs, yz_pairs, tmp1, 4ULL);
+  uint64_t f1_copy2[5U] = { 0U };
+  memcpy(f1_copy2, yz_pairs, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(yz_pairs, f1_copy2, tmp1, 4ULL);
   Hacl_K256_Field_fadd(xz_pairs, x1, z1);
   Hacl_K256_Field_fadd(tmp1, x2, z2);
-  Hacl_K256_Field_fmul(xz_pairs, xz_pairs, tmp1);
+  uint64_t f1_copy3[5U] = { 0U };
+  memcpy(f1_copy3, xz_pairs, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(xz_pairs, f1_copy3, tmp1);
   Hacl_K256_Field_fadd(tmp1, xx, zz);
-  Hacl_K256_Field_fsub(xz_pairs, xz_pairs, tmp1, 4ULL);
+  uint64_t f1_copy4[5U] = { 0U };
+  memcpy(f1_copy4, xz_pairs, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(xz_pairs, f1_copy4, tmp1, 4ULL);
   Hacl_K256_Field_fmul_small_num(tmp1, zz, 21ULL);
-  Hacl_K256_Field_fnormalize_weak(tmp1, tmp1);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, tmp1, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(tmp1, f_copy);
   Hacl_K256_Field_fsub(yy_m_bzz3, yy, tmp1, 2ULL);
   Hacl_K256_Field_fadd(yy_p_bzz3, yy, tmp1);
   Hacl_K256_Field_fmul_small_num(x3, yz_pairs, 21ULL);
-  Hacl_K256_Field_fnormalize_weak(x3, x3);
+  uint64_t f_copy0[5U] = { 0U };
+  memcpy(f_copy0, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(x3, f_copy0);
   Hacl_K256_Field_fmul_small_num(z3, xx, 3ULL);
   Hacl_K256_Field_fmul_small_num(y3, z3, 21ULL);
-  Hacl_K256_Field_fnormalize_weak(y3, y3);
+  uint64_t f_copy1[5U] = { 0U };
+  memcpy(f_copy1, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(y3, f_copy1);
   Hacl_K256_Field_fmul(tmp1, xy_pairs, yy_m_bzz3);
-  Hacl_K256_Field_fmul(x3, x3, xz_pairs);
-  Hacl_K256_Field_fsub(x3, tmp1, x3, 2ULL);
-  Hacl_K256_Field_fnormalize_weak(x3, x3);
+  uint64_t f1_copy5[5U] = { 0U };
+  memcpy(f1_copy5, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(x3, f1_copy5, xz_pairs);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fsub(x3, tmp1, f2_copy, 2ULL);
+  uint64_t f_copy2[5U] = { 0U };
+  memcpy(f_copy2, x3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(x3, f_copy2);
   Hacl_K256_Field_fmul(tmp1, yy_p_bzz3, yy_m_bzz3);
-  Hacl_K256_Field_fmul(y3, y3, xz_pairs);
-  Hacl_K256_Field_fadd(y3, tmp1, y3);
-  Hacl_K256_Field_fnormalize_weak(y3, y3);
+  uint64_t f1_copy6[5U] = { 0U };
+  memcpy(f1_copy6, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(y3, f1_copy6, xz_pairs);
+  uint64_t f2_copy0[5U] = { 0U };
+  memcpy(f2_copy0, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(y3, tmp1, f2_copy0);
+  uint64_t f_copy3[5U] = { 0U };
+  memcpy(f_copy3, y3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(y3, f_copy3);
   Hacl_K256_Field_fmul(tmp1, yz_pairs, yy_p_bzz3);
-  Hacl_K256_Field_fmul(z3, z3, xy_pairs);
-  Hacl_K256_Field_fadd(z3, tmp1, z3);
-  Hacl_K256_Field_fnormalize_weak(z3, z3);
+  uint64_t f1_copy7[5U] = { 0U };
+  memcpy(f1_copy7, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(z3, f1_copy7, xy_pairs);
+  uint64_t f2_copy1[5U] = { 0U };
+  memcpy(f2_copy1, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fadd(z3, tmp1, f2_copy1);
+  uint64_t f_copy4[5U] = { 0U };
+  memcpy(f_copy4, z3, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize_weak(z3, f_copy4);
 }
 
 static inline void scalar_split_lambda(uint64_t *r1, uint64_t *r2, uint64_t *k)
@@ -1034,13 +1193,19 @@ static inline void scalar_split_lambda(uint64_t *r1, uint64_t *r2, uint64_t *k)
   tmp2[1U] = 0x8a280ac50774346dULL;
   tmp2[2U] = 0xfffffffffffffffeULL;
   tmp2[3U] = 0xffffffffffffffffULL;
-  qmul(r1, r1, tmp1);
-  qmul(r2, r2, tmp2);
+  uint64_t f1_copy[4U] = { 0U };
+  memcpy(f1_copy, r1, 4U * sizeof (uint64_t));
+  qmul(r1, f1_copy, tmp1);
+  uint64_t f1_copy0[4U] = { 0U };
+  memcpy(f1_copy0, r2, 4U * sizeof (uint64_t));
+  qmul(r2, f1_copy0, tmp2);
   tmp1[0U] = 0xe0cfc810b51283cfULL;
   tmp1[1U] = 0xa880b9fc8ec739c2ULL;
   tmp1[2U] = 0x5ad9e3fd77ed9ba4ULL;
   tmp1[3U] = 0xac9c52b33fa3cf1fULL;
-  qadd(r2, r1, r2);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, r2, 4U * sizeof (uint64_t));
+  qadd(r2, r1, f2_copy);
   qmul(tmp2, r2, tmp1);
   qadd(r1, k, tmp2);
 }
@@ -1081,7 +1246,9 @@ static inline void point_mul_lambda_inplace(uint64_t *res)
   beta[2U] = 0xc3434e99cf049ULL;
   beta[3U] = 0x7106e64479eaULL;
   beta[4U] = 0x7ae96a2b657cULL;
-  Hacl_K256_Field_fmul(rx, beta, rx);
+  uint64_t f2_copy[5U] = { 0U };
+  memcpy(f2_copy, rx, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fmul(rx, beta, f2_copy);
 }
 
 typedef struct __bool_bool_s
@@ -1123,23 +1290,35 @@ void Hacl_Impl_K256_PointMul_point_mul(uint64_t *out, uint64_t *scalar, uint64_t
   uint64_t *t1 = table + 15U;
   Hacl_Impl_K256_Point_make_point_at_inf(t0);
   memcpy(t1, q, 15U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 15U;
-    Hacl_Impl_K256_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, t11, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointDouble_point_double(tmp, p_copy0);
     memcpy(table + (2U * i + 2U) * 15U, tmp, 15U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 15U;
-    Hacl_Impl_K256_PointAdd_point_add(tmp, q, t2);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, q, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table + (2U * i + 3U) * 15U, tmp, 15U * sizeof (uint64_t)););
   Hacl_Impl_K256_Point_make_point_at_inf(out);
   uint64_t tmp0[15U] = { 0U };
   for (uint32_t i0 = 0U; i0 < 64U; i0++)
   {
-    KRML_MAYBE_FOR4(i, 0U, 4U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[15U] = { 0U };
+      memcpy(p_copy, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy););
     uint32_t k = 256U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 15U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -1151,10 +1330,12 @@ void Hacl_Impl_K256_PointMul_point_mul(uint64_t *out, uint64_t *scalar, uint64_t
         0U,
         15U,
         1U,
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;););
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp0);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp0);
   }
 }
 
@@ -1171,8 +1352,8 @@ static inline void precomp_get_consttime(const uint64_t *table, uint64_t bits_l,
       0U,
       15U,
       1U,
-      uint64_t *os = tmp;
       uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+      uint64_t *os = tmp;
       os[i] = x;););
 }
 
@@ -1231,23 +1412,41 @@ static inline void point_mul_g(uint64_t *out, uint64_t *scalar)
     0U,
     16U,
     1U,
-    KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR4(i0,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[15U] = { 0U };
+      memcpy(p_copy, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy););
     uint32_t k = 64U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r4, k, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_g_pow2_192_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp);
     uint32_t k0 = 64U - 4U * i - 4U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r3, k0, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_g_pow2_128_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy0, tmp);
     uint32_t k1 = 64U - 4U * i - 4U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r2, k1, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_g_pow2_64_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp);
+    uint64_t p_copy1[15U] = { 0U };
+    memcpy(p_copy1, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy1, tmp);
     uint32_t k2 = 64U - 4U * i - 4U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r1, k2, 4U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w4);
     precomp_get_consttime(Hacl_K256_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp););
+    uint64_t p_copy2[15U] = { 0U };
+    memcpy(p_copy2, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy2, tmp););
 }
 
 static inline void
@@ -1275,15 +1474,20 @@ point_mul_g_double_vartime(uint64_t *out, uint64_t *scalar1, uint64_t *scalar2,
   uint64_t *t1 = table2 + 15U;
   Hacl_Impl_K256_Point_make_point_at_inf(t0);
   memcpy(t1, q2, 15U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 15U;
-    Hacl_Impl_K256_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, t11, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointDouble_point_double(tmp, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 15U, tmp, 15U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 15U;
-    Hacl_Impl_K256_PointAdd_point_add(tmp, q2, t2);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, q2, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 15U, tmp, 15U * sizeof (uint64_t)););
   uint64_t tmp0[15U] = { 0U };
   uint32_t i0 = 255U;
@@ -1296,25 +1500,39 @@ point_mul_g_double_vartime(uint64_t *out, uint64_t *scalar1, uint64_t *scalar2,
   uint32_t bits_l320 = (uint32_t)bits_c0;
   const uint64_t *a_bits_l0 = table2 + bits_l320 * 15U;
   memcpy(tmp0, (uint64_t *)a_bits_l0, 15U * sizeof (uint64_t));
-  Hacl_Impl_K256_PointAdd_point_add(out, out, tmp0);
+  uint64_t p_copy[15U] = { 0U };
+  memcpy(p_copy, out, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp0);
   uint64_t tmp1[15U] = { 0U };
   for (uint32_t i = 0U; i < 51U; i++)
   {
-    KRML_MAYBE_FOR5(i2, 0U, 5U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR5(i2,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy0[15U] = { 0U };
+      memcpy(p_copy0, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy0););
     uint32_t k = 255U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar2, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l321 = (uint32_t)bits_l;
     const uint64_t *a_bits_l1 = table2 + bits_l321 * 15U;
     memcpy(tmp1, (uint64_t *)a_bits_l1, 15U * sizeof (uint64_t));
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp1);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy0, tmp1);
     uint32_t k0 = 255U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar1, k0, 5U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l322 = (uint32_t)bits_l0;
     const
     uint64_t
     *a_bits_l2 = Hacl_K256_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * 15U;
     memcpy(tmp1, (uint64_t *)a_bits_l2, 15U * sizeof (uint64_t));
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp1);
+    uint64_t p_copy1[15U] = { 0U };
+    memcpy(p_copy1, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy1, tmp1);
   }
 }
 
@@ -1338,15 +1556,20 @@ point_mul_g_double_split_lambda_table(
   uint64_t *t1 = table2 + 15U;
   Hacl_Impl_K256_Point_make_point_at_inf(t0);
   memcpy(t1, p2, 15U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 15U;
-    Hacl_Impl_K256_PointDouble_point_double(tmp, t11);
+    uint64_t p_copy0[15U] = { 0U };
+    memcpy(p_copy0, t11, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointDouble_point_double(tmp, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 15U, tmp, 15U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 15U;
-    Hacl_Impl_K256_PointAdd_point_add(tmp, p2, t2);
+    uint64_t p_copy[15U] = { 0U };
+    memcpy(p_copy, p2, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(tmp, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 15U, tmp, 15U * sizeof (uint64_t)););
   uint64_t tmp0[15U] = { 0U };
   uint64_t tmp1[15U] = { 0U };
@@ -1365,7 +1588,9 @@ point_mul_g_double_split_lambda_table(
   memcpy(tmp1, (uint64_t *)a_bits_l0, 15U * sizeof (uint64_t));
   point_negate_conditional_vartime(tmp1, is_negate2);
   point_mul_lambda_inplace(tmp1);
-  Hacl_Impl_K256_PointAdd_point_add(out, out, tmp1);
+  uint64_t p_copy[15U] = { 0U };
+  memcpy(p_copy, out, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(out, p_copy, tmp1);
   uint64_t tmp10[15U] = { 0U };
   uint32_t i2 = 125U;
   uint64_t bits_c1 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r3, i2, 5U);
@@ -1380,29 +1605,46 @@ point_mul_g_double_split_lambda_table(
   memcpy(tmp10, (uint64_t *)a_bits_l2, 15U * sizeof (uint64_t));
   point_negate_conditional_vartime(tmp10, is_negate4);
   point_mul_lambda_inplace(tmp10);
-  Hacl_Impl_K256_PointAdd_point_add(tmp0, tmp0, tmp10);
-  Hacl_Impl_K256_PointAdd_point_add(out, out, tmp0);
+  uint64_t p_copy0[15U] = { 0U };
+  memcpy(p_copy0, tmp0, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(tmp0, p_copy0, tmp10);
+  uint64_t p_copy1[15U] = { 0U };
+  memcpy(p_copy1, out, 15U * sizeof (uint64_t));
+  Hacl_Impl_K256_PointAdd_point_add(out, p_copy1, tmp0);
   uint64_t tmp2[15U] = { 0U };
   for (uint32_t i = 0U; i < 25U; i++)
   {
-    KRML_MAYBE_FOR5(i4, 0U, 5U, 1U, Hacl_Impl_K256_PointDouble_point_double(out, out););
+    KRML_MAYBE_FOR5(i4,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy2[15U] = { 0U };
+      memcpy(p_copy2, out, 15U * sizeof (uint64_t));
+      Hacl_Impl_K256_PointDouble_point_double(out, p_copy2););
     uint32_t k = 125U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r4, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l323 = (uint32_t)bits_l;
     const uint64_t *a_bits_l3 = table2 + bits_l323 * 15U;
     memcpy(tmp2, (uint64_t *)a_bits_l3, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate4);
     point_mul_lambda_inplace(tmp2);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy2[15U] = { 0U };
+    memcpy(p_copy2, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy2, tmp2);
     uint32_t k0 = 125U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r3, k0, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l324 = (uint32_t)bits_l0;
     const uint64_t *a_bits_l4 = table2 + bits_l324 * 15U;
     memcpy(tmp2, (uint64_t *)a_bits_l4, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate3);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy3[15U] = { 0U };
+    memcpy(p_copy3, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy3, tmp2);
     uint32_t k1 = 125U - 5U * i - 5U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r2, k1, 5U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l325 = (uint32_t)bits_l1;
     const
     uint64_t
@@ -1410,16 +1652,21 @@ point_mul_g_double_split_lambda_table(
     memcpy(tmp2, (uint64_t *)a_bits_l5, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate2);
     point_mul_lambda_inplace(tmp2);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy4[15U] = { 0U };
+    memcpy(p_copy4, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy4, tmp2);
     uint32_t k2 = 125U - 5U * i - 5U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, r1, k2, 5U);
+    KRML_HOST_IGNORE(Hacl_K256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l326 = (uint32_t)bits_l2;
     const
     uint64_t
     *a_bits_l6 = Hacl_K256_PrecompTable_precomp_basepoint_table_w5 + bits_l326 * 15U;
     memcpy(tmp2, (uint64_t *)a_bits_l6, 15U * sizeof (uint64_t));
     point_negate_conditional_vartime(tmp2, is_negate1);
-    Hacl_Impl_K256_PointAdd_point_add(out, out, tmp2);
+    uint64_t p_copy5[15U] = { 0U };
+    memcpy(p_copy5, out, 15U * sizeof (uint64_t));
+    Hacl_Impl_K256_PointAdd_point_add(out, p_copy5, tmp2);
   }
 }
 
@@ -1520,7 +1767,9 @@ static inline bool fmul_eq_vartime(uint64_t *r, uint64_t *z, uint64_t *x)
 {
   uint64_t tmp[5U] = { 0U };
   Hacl_K256_Field_fmul(tmp, r, z);
-  Hacl_K256_Field_fnormalize(tmp, tmp);
+  uint64_t f_copy[5U] = { 0U };
+  memcpy(f_copy, tmp, 5U * sizeof (uint64_t));
+  Hacl_K256_Field_fnormalize(tmp, f_copy);
   bool b = Hacl_K256_Field_is_felem_eq_vartime(tmp, x);
   return b;
 }
@@ -1573,9 +1822,9 @@ Hacl_K256_ECDSA_ecdsa_sign_hashed_msg(
     0U,
     4U,
     1U,
-    uint64_t *os = d_a;
     uint64_t uu____0 = oneq10[i];
     uint64_t x = uu____0 ^ (is_b_valid0 & (d_a[i] ^ uu____0));
+    uint64_t *os = d_a;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid0;
   uint64_t is_b_valid = load_qelem_check(k_q, nonce);
@@ -1584,9 +1833,9 @@ Hacl_K256_ECDSA_ecdsa_sign_hashed_msg(
     0U,
     4U,
     1U,
-    uint64_t *os = k_q;
     uint64_t uu____1 = oneq1[i];
     uint64_t x = uu____1 ^ (is_b_valid & (k_q[i] ^ uu____1));
+    uint64_t *os = k_q;
     os[i] = x;);
   uint64_t is_nonce_valid = is_b_valid;
   uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid;
@@ -1602,8 +1851,12 @@ Hacl_K256_ECDSA_ecdsa_sign_hashed_msg(
   load_qelem_modq(z, msgHash);
   qinv(kinv, k_q);
   qmul(s_q, r_q, d_a);
-  qadd(s_q, z, s_q);
-  qmul(s_q, kinv, s_q);
+  uint64_t f2_copy[4U] = { 0U };
+  memcpy(f2_copy, s_q, 4U * sizeof (uint64_t));
+  qadd(s_q, z, f2_copy);
+  uint64_t f2_copy0[4U] = { 0U };
+  memcpy(f2_copy0, s_q, 4U * sizeof (uint64_t));
+  qmul(s_q, kinv, f2_copy0);
   store_qelem(signature, r_q);
   store_qelem(signature + 32U, s_q);
   uint64_t is_r_zero = is_qelem_zero(r_q);
@@ -1706,7 +1959,9 @@ Hacl_K256_ECDSA_ecdsa_verify_hashed_msg(uint8_t *m, uint8_t *public_key, uint8_t
       tmp_q[2U] = 0xffffffebaaedcULL;
       tmp_q[3U] = 0xfffffffffffffULL;
       tmp_q[4U] = 0xffffffffffffULL;
-      Hacl_K256_Field_fadd(tmp_q, r_fe, tmp_q);
+      uint64_t f2_copy[5U] = { 0U };
+      memcpy(f2_copy, tmp_q, 5U * sizeof (uint64_t));
+      Hacl_K256_Field_fadd(tmp_q, r_fe, f2_copy);
       return fmul_eq_vartime(tmp_q, z, tmp_x);
     }
     return false;
@@ -1952,8 +2207,8 @@ bool Hacl_K256_ECDSA_public_key_compressed_to_raw(uint8_t *pk_raw, uint8_t *pk)
 {
   uint64_t xa[5U] = { 0U };
   uint64_t ya[5U] = { 0U };
-  uint8_t *pk_xb = pk + 1U;
   bool b = aff_point_decompress_vartime(xa, ya, pk);
+  uint8_t *pk_xb = pk + 1U;
   if (b)
   {
     memcpy(pk_raw, pk_xb, 32U * sizeof (uint8_t));
@@ -2059,9 +2314,9 @@ bool Hacl_K256_ECDSA_secret_to_public(uint8_t *public_key, uint8_t *private_key)
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   point_mul_g(pk, sk);
@@ -2094,9 +2349,9 @@ bool Hacl_K256_ECDSA_ecdh(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   if (is_pk_valid)
diff --git a/src/Hacl_MAC_Poly1305.c b/src/Hacl_MAC_Poly1305.c
index 28cbca5a..a3816bfa 100644
--- a/src/Hacl_MAC_Poly1305.c
+++ b/src/Hacl_MAC_Poly1305.c
@@ -445,6 +445,7 @@ Hacl_MAC_Poly1305_state_t *Hacl_MAC_Poly1305_malloc(uint8_t *key)
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
   uint64_t *r1 = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
   uint64_t *block_state = r1;
+  Hacl_MAC_Poly1305_poly1305_init(block_state, key);
   uint8_t *k_ = (uint8_t *)KRML_HOST_CALLOC(32U, sizeof (uint8_t));
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_0 = k_;
@@ -453,22 +454,19 @@ Hacl_MAC_Poly1305_state_t *Hacl_MAC_Poly1305_malloc(uint8_t *key)
   Hacl_MAC_Poly1305_state_t
   *p = (Hacl_MAC_Poly1305_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_MAC_Poly1305_state_t));
   p[0U] = s;
-  Hacl_MAC_Poly1305_poly1305_init(block_state, key);
   return p;
 }
 
 void Hacl_MAC_Poly1305_reset(Hacl_MAC_Poly1305_state_t *state, uint8_t *key)
 {
-  Hacl_MAC_Poly1305_state_t scrut = *state;
-  uint8_t *k_ = scrut.p_key;
-  uint8_t *buf = scrut.buf;
-  uint64_t *block_state = scrut.block_state;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *k_ = (*state).p_key;
   Hacl_MAC_Poly1305_poly1305_init(block_state, key);
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_1 = k_;
-  Hacl_MAC_Poly1305_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U, .p_key = k_1 };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
+  state->p_key = k_1;
 }
 
 /**
@@ -477,8 +475,8 @@ void Hacl_MAC_Poly1305_reset(Hacl_MAC_Poly1305_state_t *state, uint8_t *key)
 Hacl_Streaming_Types_error_code
 Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint32_t chunk_len)
 {
-  Hacl_MAC_Poly1305_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  uint64_t *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -494,11 +492,9 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
   }
   if (chunk_len <= 16U - sz)
   {
-    Hacl_MAC_Poly1305_state_t s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)16U == 0ULL && total_len1 > 0ULL)
     {
@@ -511,24 +507,14 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len2;
+    state->p_key = k_1;
   }
   else if (sz == 0U)
   {
-    Hacl_MAC_Poly1305_state_t s1 = *state;
-    uint64_t *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)16U == 0ULL && total_len1 > 0ULL)
     {
@@ -540,7 +526,7 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 16U, buf);
+      poly1305_update(block_state, 16U, buf);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)16U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -556,30 +542,20 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
+    poly1305_update(block_state, data1_len, data1);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
+    state->p_key = k_1;
   }
   else
   {
     uint32_t diff = 16U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_MAC_Poly1305_state_t s1 = *state;
-    uint64_t *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz10;
     if (total_len10 % (uint64_t)16U == 0ULL && total_len10 > 0ULL)
     {
@@ -589,24 +565,14 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)16U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
-    Hacl_MAC_Poly1305_state_t s10 = *state;
-    uint64_t *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
-    uint8_t *k_10 = s10.p_key;
+    state->total_len = total_len2;
+    state->p_key = k_1;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_10 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)16U == 0ULL && total_len1 > 0ULL)
     {
@@ -618,7 +584,7 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 16U, buf);
+      poly1305_update(block_state, 16U, buf0);
     }
     uint32_t ite;
     if
@@ -635,30 +601,21 @@ Hacl_MAC_Poly1305_update(Hacl_MAC_Poly1305_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
-    uint8_t *dst = buf;
+    poly1305_update(block_state, data1_len, data1);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff),
-          .p_key = k_10
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
+    state->p_key = k_10;
   }
   return Hacl_Streaming_Types_Success;
 }
 
 void Hacl_MAC_Poly1305_digest(Hacl_MAC_Poly1305_state_t *state, uint8_t *output)
 {
-  Hacl_MAC_Poly1305_state_t scrut = *state;
-  uint64_t *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
-  uint8_t *k_ = scrut.p_key;
+  uint64_t *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
+  uint8_t *k_ = (*state).p_key;
   uint32_t r;
   if (total_len % (uint64_t)16U == 0ULL && total_len > 0ULL)
   {
@@ -672,6 +629,7 @@ void Hacl_MAC_Poly1305_digest(Hacl_MAC_Poly1305_state_t *state, uint8_t *output)
   uint64_t r1[25U] = { 0U };
   uint64_t *tmp_block_state = r1;
   memcpy(tmp_block_state, block_state, 25U * sizeof (uint64_t));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 16U == 0U && r > 0U)
   {
@@ -682,7 +640,6 @@ void Hacl_MAC_Poly1305_digest(Hacl_MAC_Poly1305_state_t *state, uint8_t *output)
     ite = r % 16U;
   }
   uint8_t *buf_last = buf_1 + r - ite;
-  uint8_t *buf_multi = buf_1;
   poly1305_update(tmp_block_state, 0U, buf_multi);
   poly1305_update(tmp_block_state, r, buf_last);
   uint64_t tmp[25U] = { 0U };
diff --git a/src/Hacl_MAC_Poly1305_Simd128.c b/src/Hacl_MAC_Poly1305_Simd128.c
index 17e26978..cbfda978 100644
--- a/src/Hacl_MAC_Poly1305_Simd128.c
+++ b/src/Hacl_MAC_Poly1305_Simd128.c
@@ -1310,6 +1310,7 @@ Hacl_MAC_Poly1305_Simd128_state_t *Hacl_MAC_Poly1305_Simd128_malloc(uint8_t *key
       sizeof (Lib_IntVector_Intrinsics_vec128) * 25U);
   memset(r1, 0U, 25U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Lib_IntVector_Intrinsics_vec128 *block_state = r1;
+  Hacl_MAC_Poly1305_Simd128_poly1305_init(block_state, key);
   uint8_t *k_ = (uint8_t *)KRML_HOST_CALLOC(32U, sizeof (uint8_t));
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_0 = k_;
@@ -1321,22 +1322,19 @@ Hacl_MAC_Poly1305_Simd128_state_t *Hacl_MAC_Poly1305_Simd128_malloc(uint8_t *key
         Hacl_MAC_Poly1305_Simd128_state_t
       ));
   p[0U] = s;
-  Hacl_MAC_Poly1305_Simd128_poly1305_init(block_state, key);
   return p;
 }
 
 void Hacl_MAC_Poly1305_Simd128_reset(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8_t *key)
 {
-  Hacl_MAC_Poly1305_Simd128_state_t scrut = *state;
-  uint8_t *k_ = scrut.p_key;
-  uint8_t *buf = scrut.buf;
-  Lib_IntVector_Intrinsics_vec128 *block_state = scrut.block_state;
+  Lib_IntVector_Intrinsics_vec128 *block_state = (*state).block_state;
+  uint8_t *k_ = (*state).p_key;
   Hacl_MAC_Poly1305_Simd128_poly1305_init(block_state, key);
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_1 = k_;
-  Hacl_MAC_Poly1305_Simd128_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U, .p_key = k_1 };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
+  state->p_key = k_1;
 }
 
 /**
@@ -1349,8 +1347,8 @@ Hacl_MAC_Poly1305_Simd128_update(
   uint32_t chunk_len
 )
 {
-  Hacl_MAC_Poly1305_Simd128_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Lib_IntVector_Intrinsics_vec128 *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -1366,11 +1364,9 @@ Hacl_MAC_Poly1305_Simd128_update(
   }
   if (chunk_len <= 32U - sz)
   {
-    Hacl_MAC_Poly1305_Simd128_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)32U == 0ULL && total_len1 > 0ULL)
     {
@@ -1383,24 +1379,14 @@ Hacl_MAC_Poly1305_Simd128_update(
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len2;
+    state->p_key = k_1;
   }
   else if (sz == 0U)
   {
-    Hacl_MAC_Poly1305_Simd128_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)32U == 0ULL && total_len1 > 0ULL)
     {
@@ -1412,7 +1398,7 @@ Hacl_MAC_Poly1305_Simd128_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 32U, buf);
+      poly1305_update(block_state, 32U, buf);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)32U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1428,30 +1414,20 @@ Hacl_MAC_Poly1305_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
+    poly1305_update(block_state, data1_len, data1);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
+    state->p_key = k_1;
   }
   else
   {
     uint32_t diff = 32U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_MAC_Poly1305_Simd128_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz10;
     if (total_len10 % (uint64_t)32U == 0ULL && total_len10 > 0ULL)
     {
@@ -1461,24 +1437,14 @@ Hacl_MAC_Poly1305_Simd128_update(
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)32U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
-    Hacl_MAC_Poly1305_Simd128_state_t s10 = *state;
-    Lib_IntVector_Intrinsics_vec128 *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
-    uint8_t *k_10 = s10.p_key;
+    state->total_len = total_len2;
+    state->p_key = k_1;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_10 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)32U == 0ULL && total_len1 > 0ULL)
     {
@@ -1490,7 +1456,7 @@ Hacl_MAC_Poly1305_Simd128_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 32U, buf);
+      poly1305_update(block_state, 32U, buf0);
     }
     uint32_t ite;
     if
@@ -1507,19 +1473,11 @@ Hacl_MAC_Poly1305_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
-    uint8_t *dst = buf;
+    poly1305_update(block_state, data1_len, data1);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd128_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff),
-          .p_key = k_10
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
+    state->p_key = k_10;
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1527,11 +1485,10 @@ Hacl_MAC_Poly1305_Simd128_update(
 void
 Hacl_MAC_Poly1305_Simd128_digest(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8_t *output)
 {
-  Hacl_MAC_Poly1305_Simd128_state_t scrut = *state;
-  Lib_IntVector_Intrinsics_vec128 *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
-  uint8_t *k_ = scrut.p_key;
+  Lib_IntVector_Intrinsics_vec128 *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
+  uint8_t *k_ = (*state).p_key;
   uint32_t r;
   if (total_len % (uint64_t)32U == 0ULL && total_len > 0ULL)
   {
@@ -1545,6 +1502,7 @@ Hacl_MAC_Poly1305_Simd128_digest(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 r1[25U] KRML_POST_ALIGN(16) = { 0U };
   Lib_IntVector_Intrinsics_vec128 *tmp_block_state = r1;
   memcpy(tmp_block_state, block_state, 25U * sizeof (Lib_IntVector_Intrinsics_vec128));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite0;
   if (r % 16U == 0U && r > 0U)
   {
@@ -1555,7 +1513,6 @@ Hacl_MAC_Poly1305_Simd128_digest(Hacl_MAC_Poly1305_Simd128_state_t *state, uint8
     ite0 = r % 16U;
   }
   uint8_t *buf_last = buf_1 + r - ite0;
-  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 16U == 0U && r > 0U)
   {
diff --git a/src/Hacl_MAC_Poly1305_Simd256.c b/src/Hacl_MAC_Poly1305_Simd256.c
index f25e8fff..b02880d4 100644
--- a/src/Hacl_MAC_Poly1305_Simd256.c
+++ b/src/Hacl_MAC_Poly1305_Simd256.c
@@ -1761,6 +1761,7 @@ Hacl_MAC_Poly1305_Simd256_state_t *Hacl_MAC_Poly1305_Simd256_malloc(uint8_t *key
       sizeof (Lib_IntVector_Intrinsics_vec256) * 25U);
   memset(r1, 0U, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Lib_IntVector_Intrinsics_vec256 *block_state = r1;
+  Hacl_MAC_Poly1305_Simd256_poly1305_init(block_state, key);
   uint8_t *k_ = (uint8_t *)KRML_HOST_CALLOC(32U, sizeof (uint8_t));
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_0 = k_;
@@ -1772,22 +1773,19 @@ Hacl_MAC_Poly1305_Simd256_state_t *Hacl_MAC_Poly1305_Simd256_malloc(uint8_t *key
         Hacl_MAC_Poly1305_Simd256_state_t
       ));
   p[0U] = s;
-  Hacl_MAC_Poly1305_Simd256_poly1305_init(block_state, key);
   return p;
 }
 
 void Hacl_MAC_Poly1305_Simd256_reset(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8_t *key)
 {
-  Hacl_MAC_Poly1305_Simd256_state_t scrut = *state;
-  uint8_t *k_ = scrut.p_key;
-  uint8_t *buf = scrut.buf;
-  Lib_IntVector_Intrinsics_vec256 *block_state = scrut.block_state;
+  Lib_IntVector_Intrinsics_vec256 *block_state = (*state).block_state;
+  uint8_t *k_ = (*state).p_key;
   Hacl_MAC_Poly1305_Simd256_poly1305_init(block_state, key);
   memcpy(k_, key, 32U * sizeof (uint8_t));
   uint8_t *k_1 = k_;
-  Hacl_MAC_Poly1305_Simd256_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)0U, .p_key = k_1 };
-  state[0U] = tmp;
+  uint64_t total_len = (uint64_t)0U;
+  state->total_len = total_len;
+  state->p_key = k_1;
 }
 
 /**
@@ -1800,8 +1798,8 @@ Hacl_MAC_Poly1305_Simd256_update(
   uint32_t chunk_len
 )
 {
-  Hacl_MAC_Poly1305_Simd256_state_t s = *state;
-  uint64_t total_len = s.total_len;
+  Lib_IntVector_Intrinsics_vec256 *block_state = (*state).block_state;
+  uint64_t total_len = (*state).total_len;
   if ((uint64_t)chunk_len > 0xffffffffULL - total_len)
   {
     return Hacl_Streaming_Types_MaximumLengthExceeded;
@@ -1817,11 +1815,9 @@ Hacl_MAC_Poly1305_Simd256_update(
   }
   if (chunk_len <= 64U - sz)
   {
-    Hacl_MAC_Poly1305_Simd256_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1834,24 +1830,14 @@ Hacl_MAC_Poly1305_Simd256_update(
     uint8_t *buf2 = buf + sz1;
     memcpy(buf2, chunk, chunk_len * sizeof (uint8_t));
     uint64_t total_len2 = total_len1 + (uint64_t)chunk_len;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len2;
+    state->p_key = k_1;
   }
   else if (sz == 0U)
   {
-    Hacl_MAC_Poly1305_Simd256_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state1 = s1.block_state;
-    uint8_t *buf = s1.buf;
-    uint64_t total_len1 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1863,7 +1849,7 @@ Hacl_MAC_Poly1305_Simd256_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 64U, buf);
+      poly1305_update(block_state, 64U, buf);
     }
     uint32_t ite;
     if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL)
@@ -1879,30 +1865,20 @@ Hacl_MAC_Poly1305_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
+    poly1305_update(block_state, data1_len, data1);
     uint8_t *dst = buf;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)chunk_len,
-          .p_key = k_1
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)chunk_len;
+    state->p_key = k_1;
   }
   else
   {
     uint32_t diff = 64U - sz;
     uint8_t *chunk1 = chunk;
     uint8_t *chunk2 = chunk + diff;
-    Hacl_MAC_Poly1305_Simd256_state_t s1 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state10 = s1.block_state;
-    uint8_t *buf0 = s1.buf;
-    uint64_t total_len10 = s1.total_len;
-    uint8_t *k_1 = s1.p_key;
+    uint8_t *buf = (*state).buf;
+    uint64_t total_len10 = (*state).total_len;
+    uint8_t *k_1 = (*state).p_key;
     uint32_t sz10;
     if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL)
     {
@@ -1912,24 +1888,14 @@ Hacl_MAC_Poly1305_Simd256_update(
     {
       sz10 = (uint32_t)(total_len10 % (uint64_t)64U);
     }
-    uint8_t *buf2 = buf0 + sz10;
+    uint8_t *buf2 = buf + sz10;
     memcpy(buf2, chunk1, diff * sizeof (uint8_t));
     uint64_t total_len2 = total_len10 + (uint64_t)diff;
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state10,
-          .buf = buf0,
-          .total_len = total_len2,
-          .p_key = k_1
-        }
-      );
-    Hacl_MAC_Poly1305_Simd256_state_t s10 = *state;
-    Lib_IntVector_Intrinsics_vec256 *block_state1 = s10.block_state;
-    uint8_t *buf = s10.buf;
-    uint64_t total_len1 = s10.total_len;
-    uint8_t *k_10 = s10.p_key;
+    state->total_len = total_len2;
+    state->p_key = k_1;
+    uint8_t *buf0 = (*state).buf;
+    uint64_t total_len1 = (*state).total_len;
+    uint8_t *k_10 = (*state).p_key;
     uint32_t sz1;
     if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL)
     {
@@ -1941,7 +1907,7 @@ Hacl_MAC_Poly1305_Simd256_update(
     }
     if (!(sz1 == 0U))
     {
-      poly1305_update(block_state1, 64U, buf);
+      poly1305_update(block_state, 64U, buf0);
     }
     uint32_t ite;
     if
@@ -1958,19 +1924,11 @@ Hacl_MAC_Poly1305_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    poly1305_update(block_state1, data1_len, data1);
-    uint8_t *dst = buf;
+    poly1305_update(block_state, data1_len, data1);
+    uint8_t *dst = buf0;
     memcpy(dst, data2, data2_len * sizeof (uint8_t));
-    *state
-    =
-      (
-        (Hacl_MAC_Poly1305_Simd256_state_t){
-          .block_state = block_state1,
-          .buf = buf,
-          .total_len = total_len1 + (uint64_t)(chunk_len - diff),
-          .p_key = k_10
-        }
-      );
+    state->total_len = total_len1 + (uint64_t)(chunk_len - diff);
+    state->p_key = k_10;
   }
   return Hacl_Streaming_Types_Success;
 }
@@ -1978,11 +1936,10 @@ Hacl_MAC_Poly1305_Simd256_update(
 void
 Hacl_MAC_Poly1305_Simd256_digest(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8_t *output)
 {
-  Hacl_MAC_Poly1305_Simd256_state_t scrut = *state;
-  Lib_IntVector_Intrinsics_vec256 *block_state = scrut.block_state;
-  uint8_t *buf_ = scrut.buf;
-  uint64_t total_len = scrut.total_len;
-  uint8_t *k_ = scrut.p_key;
+  Lib_IntVector_Intrinsics_vec256 *block_state = (*state).block_state;
+  uint8_t *buf_ = (*state).buf;
+  uint64_t total_len = (*state).total_len;
+  uint8_t *k_ = (*state).p_key;
   uint32_t r;
   if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL)
   {
@@ -1996,6 +1953,7 @@ Hacl_MAC_Poly1305_Simd256_digest(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 r1[25U] KRML_POST_ALIGN(32) = { 0U };
   Lib_IntVector_Intrinsics_vec256 *tmp_block_state = r1;
   memcpy(tmp_block_state, block_state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  uint8_t *buf_multi = buf_1;
   uint32_t ite0;
   if (r % 16U == 0U && r > 0U)
   {
@@ -2006,7 +1964,6 @@ Hacl_MAC_Poly1305_Simd256_digest(Hacl_MAC_Poly1305_Simd256_state_t *state, uint8
     ite0 = r % 16U;
   }
   uint8_t *buf_last = buf_1 + r - ite0;
-  uint8_t *buf_multi = buf_1;
   uint32_t ite;
   if (r % 16U == 0U && r > 0U)
   {
diff --git a/src/Hacl_NaCl.c b/src/Hacl_NaCl.c
index a1bbd25c..54cf0171 100644
--- a/src/Hacl_NaCl.c
+++ b/src/Hacl_NaCl.c
@@ -62,8 +62,8 @@ secretbox_detached(uint32_t mlen, uint8_t *c, uint8_t *tag, uint8_t *k, uint8_t
   memcpy(block0, m0, mlen0 * sizeof (uint8_t));
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    uint8_t *os = block0;
     uint8_t x = (uint32_t)block0[i] ^ (uint32_t)ekey0[i];
+    uint8_t *os = block0;
     os[i] = x;
   }
   uint8_t *c0 = c;
@@ -117,8 +117,8 @@ secretbox_open_detached(
     memcpy(block0, c0, mlen0 * sizeof (uint8_t));
     for (uint32_t i = 0U; i < 32U; i++)
     {
-      uint8_t *os = block0;
       uint8_t x = (uint32_t)block0[i] ^ (uint32_t)ekey0[i];
+      uint8_t *os = block0;
       os[i] = x;
     }
     uint8_t *m0 = m;
diff --git a/src/Hacl_P256.c b/src/Hacl_P256.c
index 609fed81..c1db5d68 100644
--- a/src/Hacl_P256.c
+++ b/src/Hacl_P256.c
@@ -77,9 +77,9 @@ static inline void bn_cmovznz4(uint64_t *res, uint64_t cin, uint64_t *x, uint64_
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t uu____0 = x[i];
     uint64_t x1 = uu____0 ^ (mask & (y[i] ^ uu____0));
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -131,8 +131,8 @@ static inline void bn_add_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -210,8 +210,8 @@ static inline void bn_sub_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & tmp[i]) | (~c2 & res[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -250,8 +250,8 @@ static inline void bn_sqr4(uint64_t *res, uint64_t *x)
     0U,
     4U,
     1U,
-    uint64_t *ab = x;
     uint64_t a_j = x[i0];
+    uint64_t *ab = x;
     uint64_t *res_j = res + i0;
     uint64_t c = 0ULL;
     for (uint32_t i = 0U; i < i0 / 4U; i++)
@@ -277,7 +277,12 @@ static inline void bn_sqr4(uint64_t *res, uint64_t *x)
     }
     uint64_t r = c;
     res[i0 + i0] = r;);
-  uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, res, res);
+  uint64_t a_copy0[8U] = { 0U };
+  uint64_t b_copy0[8U] = { 0U };
+  memcpy(a_copy0, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy0, res, 8U * sizeof (uint64_t));
+  uint64_t r = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy0, b_copy0, res);
+  uint64_t c0 = r;
   KRML_MAYBE_UNUSED_VAR(c0);
   uint64_t tmp[8U] = { 0U };
   KRML_MAYBE_FOR4(i,
@@ -289,7 +294,12 @@ static inline void bn_sqr4(uint64_t *res, uint64_t *x)
     uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
     tmp[2U * i] = lo;
     tmp[2U * i + 1U] = hi;);
-  uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, res, tmp, res);
+  uint64_t a_copy[8U] = { 0U };
+  uint64_t b_copy[8U] = { 0U };
+  memcpy(a_copy, res, 8U * sizeof (uint64_t));
+  memcpy(b_copy, tmp, 8U * sizeof (uint64_t));
+  uint64_t r0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(8U, a_copy, b_copy, res);
+  uint64_t c1 = r0;
   KRML_MAYBE_UNUSED_VAR(c1);
 }
 
@@ -306,9 +316,9 @@ static inline void bn_from_bytes_be4(uint64_t *res, uint8_t *b)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t u = load64_be(b + (4U - i - 1U) * 8U);
     uint64_t x = u;
+    uint64_t *os = res;
     os[i] = x;);
 }
 
@@ -394,8 +404,11 @@ static inline uint64_t bn_is_lt_prime_mask4(uint64_t *f)
 {
   uint64_t tmp[4U] = { 0U };
   make_prime(tmp);
-  uint64_t c = bn_sub4(tmp, f, tmp);
-  return 0ULL - c;
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, tmp, 4U * sizeof (uint64_t));
+  uint64_t c = bn_sub4(tmp, f, y_copy);
+  uint64_t c0 = c;
+  return 0ULL - c0;
 }
 
 static inline uint64_t feq_mask(uint64_t *a, uint64_t *b)
@@ -423,7 +436,9 @@ static inline void fnegate_conditional_vartime(uint64_t *f, bool is_negate)
   uint64_t zero[4U] = { 0U };
   if (is_negate)
   {
-    fsub0(f, zero, f);
+    uint64_t y_copy[4U] = { 0U };
+    memcpy(y_copy, f, 4U * sizeof (uint64_t));
+    fsub0(f, zero, y_copy);
   }
 }
 
@@ -455,8 +470,8 @@ static inline void mont_reduction(uint64_t *res, uint64_t *x)
     }
     uint64_t r = c;
     uint64_t c1 = r;
-    uint64_t *resb = x + 4U + i0;
     uint64_t res_j = x[4U + i0];
+    uint64_t *resb = x + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
   memcpy(res, x + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -486,8 +501,8 @@ static inline void mont_reduction(uint64_t *res, uint64_t *x)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -529,7 +544,9 @@ static inline void fmul_by_b_coeff(uint64_t *res, uint64_t *x)
 static inline void fcube(uint64_t *res, uint64_t *x)
 {
   fsqr0(res, x);
-  fmul0(res, res, x);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, res, 4U * sizeof (uint64_t));
+  fmul0(res, x_copy, x);
 }
 
 static inline void finv(uint64_t *res, uint64_t *a)
@@ -541,51 +558,121 @@ static inline void finv(uint64_t *res, uint64_t *a)
   uint64_t *tmp2 = tmp + 12U;
   memcpy(x2, a, 4U * sizeof (uint64_t));
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy);
   }
-  fmul0(x2, x2, a);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy, a);
   memcpy(x30, x2, 4U * sizeof (uint64_t));
   {
-    fsqr0(x30, x30);
+    uint64_t x_copy0[4U] = { 0U };
+    memcpy(x_copy0, x30, 4U * sizeof (uint64_t));
+    fsqr0(x30, x_copy0);
   }
-  fmul0(x30, x30, a);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, x30, 4U * sizeof (uint64_t));
+  fmul0(x30, x_copy0, a);
   memcpy(tmp1, x30, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, x30);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy1[4U] = { 0U };
+    memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy1););
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy1, x30);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, fsqr0(tmp2, tmp2););
-  fmul0(tmp2, tmp2, tmp1);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy2[4U] = { 0U };
+    memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy2););
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy2, tmp1);
   memcpy(tmp1, tmp2, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, x30);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy3[4U] = { 0U };
+    memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy3););
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy3, x30);
   memcpy(x30, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR15(i, 0U, 15U, 1U, fsqr0(x30, x30););
-  fmul0(x30, x30, tmp1);
+  KRML_MAYBE_FOR15(i,
+    0U,
+    15U,
+    1U,
+    uint64_t x_copy4[4U] = { 0U };
+    memcpy(x_copy4, x30, 4U * sizeof (uint64_t));
+    fsqr0(x30, x_copy4););
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, x30, 4U * sizeof (uint64_t));
+  fmul0(x30, x_copy4, tmp1);
   memcpy(tmp1, x30, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, x2);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy5[4U] = { 0U };
+    memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy5););
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy5, x2);
   memcpy(x2, tmp1, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy6[4U] = { 0U };
+    memcpy(x_copy6, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy6);
   }
-  fmul0(x2, x2, a);
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy6, a);
   for (uint32_t i = 0U; i < 128U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy7[4U] = { 0U };
+    memcpy(x_copy7, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy7);
   }
-  fmul0(x2, x2, tmp1);
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy7, tmp1);
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy8[4U] = { 0U };
+    memcpy(x_copy8, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy8);
   }
-  fmul0(x2, x2, tmp1);
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy8, tmp1);
   for (uint32_t i = 0U; i < 30U; i++)
   {
-    fsqr0(x2, x2);
+    uint64_t x_copy9[4U] = { 0U };
+    memcpy(x_copy9, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy9);
   }
-  fmul0(x2, x2, x30);
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, fsqr0(x2, x2););
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, x2, 4U * sizeof (uint64_t));
+  fmul0(x2, x_copy9, x30);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy10[4U] = { 0U };
+    memcpy(x_copy10, x2, 4U * sizeof (uint64_t));
+    fsqr0(x2, x_copy10););
   fmul0(tmp1, x2, a);
   memcpy(res, tmp1, 4U * sizeof (uint64_t));
 }
@@ -597,35 +684,81 @@ static inline void fsqrt(uint64_t *res, uint64_t *a)
   uint64_t *tmp2 = tmp + 4U;
   memcpy(tmp1, a, 4U * sizeof (uint64_t));
   {
-    fsqr0(tmp1, tmp1);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy);
   }
-  fmul0(tmp1, tmp1, a);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy, a);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, fsqr0(tmp2, tmp2););
-  fmul0(tmp2, tmp2, tmp1);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy0[4U] = { 0U };
+    memcpy(x_copy0, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy0););
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy0, tmp1);
   memcpy(tmp1, tmp2, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, tmp2);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy1[4U] = { 0U };
+    memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy1););
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy1, tmp2);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR8(i, 0U, 8U, 1U, fsqr0(tmp2, tmp2););
-  fmul0(tmp2, tmp2, tmp1);
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint64_t x_copy2[4U] = { 0U };
+    memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy2););
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy2, tmp1);
   memcpy(tmp1, tmp2, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR16(i, 0U, 16U, 1U, fsqr0(tmp1, tmp1););
-  fmul0(tmp1, tmp1, tmp2);
+  KRML_MAYBE_FOR16(i,
+    0U,
+    16U,
+    1U,
+    uint64_t x_copy3[4U] = { 0U };
+    memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+    fsqr0(tmp1, x_copy3););
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+  fmul0(tmp1, x_copy3, tmp2);
   memcpy(tmp2, tmp1, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    fsqr0(tmp2, tmp2);
+    uint64_t x_copy4[4U] = { 0U };
+    memcpy(x_copy4, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy4);
   }
-  fmul0(tmp2, tmp2, a);
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy4, a);
   for (uint32_t i = 0U; i < 96U; i++)
   {
-    fsqr0(tmp2, tmp2);
+    uint64_t x_copy5[4U] = { 0U };
+    memcpy(x_copy5, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy5);
   }
-  fmul0(tmp2, tmp2, a);
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, tmp2, 4U * sizeof (uint64_t));
+  fmul0(tmp2, x_copy5, a);
   for (uint32_t i = 0U; i < 94U; i++)
   {
-    fsqr0(tmp2, tmp2);
+    uint64_t x_copy6[4U] = { 0U };
+    memcpy(x_copy6, tmp2, 4U * sizeof (uint64_t));
+    fsqr0(tmp2, x_copy6);
   }
   memcpy(res, tmp2, 4U * sizeof (uint64_t));
 }
@@ -667,8 +800,12 @@ static inline void to_aff_point(uint64_t *res, uint64_t *p)
   finv(zinv, pz);
   fmul0(x, px, zinv);
   fmul0(y, py, zinv);
-  from_mont(x, x);
-  from_mont(y, y);
+  uint64_t a_copy[4U] = { 0U };
+  memcpy(a_copy, x, 4U * sizeof (uint64_t));
+  from_mont(x, a_copy);
+  uint64_t a_copy0[4U] = { 0U };
+  memcpy(a_copy0, y, 4U * sizeof (uint64_t));
+  from_mont(y, a_copy0);
 }
 
 static inline void to_aff_point_x(uint64_t *res, uint64_t *p)
@@ -678,7 +815,9 @@ static inline void to_aff_point_x(uint64_t *res, uint64_t *p)
   uint64_t *pz = p + 8U;
   finv(zinv, pz);
   fmul0(res, px, zinv);
-  from_mont(res, res);
+  uint64_t a_copy[4U] = { 0U };
+  memcpy(a_copy, res, 4U * sizeof (uint64_t));
+  from_mont(res, a_copy);
 }
 
 static inline void to_proj_point(uint64_t *res, uint64_t *p)
@@ -705,11 +844,19 @@ static inline bool is_on_curve_vartime(uint64_t *p)
   uint64_t tmp[4U] = { 0U };
   fcube(rp, tx);
   make_a_coeff(tmp);
-  fmul0(tmp, tmp, tx);
-  fadd0(rp, tmp, rp);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, tmp, 4U * sizeof (uint64_t));
+  fmul0(tmp, x_copy, tx);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, rp, 4U * sizeof (uint64_t));
+  fadd0(rp, tmp, y_copy);
   make_b_coeff(tmp);
-  fadd0(rp, tmp, rp);
-  fsqr0(ty, ty);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, rp, 4U * sizeof (uint64_t));
+  fadd0(rp, tmp, y_copy0);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, ty, 4U * sizeof (uint64_t));
+  fsqr0(ty, x_copy0);
   uint64_t r = feq_mask(ty, rp);
   bool r0 = r == 0xFFFFFFFFFFFFFFFFULL;
   return r0;
@@ -785,13 +932,21 @@ static inline bool aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_
   uint64_t tmp[4U] = { 0U };
   fcube(y2M, xM);
   make_a_coeff(tmp);
-  fmul0(tmp, tmp, xM);
-  fadd0(y2M, tmp, y2M);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, tmp, 4U * sizeof (uint64_t));
+  fmul0(tmp, x_copy, xM);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, y2M, 4U * sizeof (uint64_t));
+  fadd0(y2M, tmp, y_copy);
   make_b_coeff(tmp);
-  fadd0(y2M, tmp, y2M);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, y2M, 4U * sizeof (uint64_t));
+  fadd0(y2M, tmp, y_copy0);
   fsqrt(yM, y2M);
   from_mont(y, yM);
-  fsqr0(yM, yM);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, yM, 4U * sizeof (uint64_t));
+  fsqr0(yM, x_copy0);
   uint64_t r = feq_mask(yM, y2M);
   bool is_y_valid = r == 0xFFFFFFFFFFFFFFFFULL;
   bool is_y_valid0 = is_y_valid;
@@ -808,8 +963,6 @@ static inline bool aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_
 static inline void point_double(uint64_t *res, uint64_t *p)
 {
   uint64_t tmp[20U] = { 0U };
-  uint64_t *x = p;
-  uint64_t *z = p + 8U;
   uint64_t *x3 = res;
   uint64_t *y3 = res + 4U;
   uint64_t *z3 = res + 8U;
@@ -818,43 +971,85 @@ static inline void point_double(uint64_t *res, uint64_t *p)
   uint64_t *t2 = tmp + 8U;
   uint64_t *t3 = tmp + 12U;
   uint64_t *t4 = tmp + 16U;
-  uint64_t *x1 = p;
+  uint64_t *x = p;
   uint64_t *y = p + 4U;
-  uint64_t *z1 = p + 8U;
-  fsqr0(t0, x1);
+  uint64_t *z0 = p + 8U;
+  fsqr0(t0, x);
   fsqr0(t1, y);
-  fsqr0(t2, z1);
-  fmul0(t3, x1, y);
-  fadd0(t3, t3, t3);
-  fmul0(t4, y, z1);
-  fmul0(z3, x, z);
-  fadd0(z3, z3, z3);
+  fsqr0(t2, z0);
+  fmul0(t3, x, y);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, t3, 4U * sizeof (uint64_t));
+  fadd0(t3, x_copy, x_copy);
+  fmul0(t4, y, z0);
+  uint64_t *x0 = p;
+  uint64_t *z = p + 8U;
+  fmul0(z3, x0, z);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy0, x_copy0);
   fmul_by_b_coeff(y3, t2);
-  fsub0(y3, y3, z3);
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x_copy1, z3);
   fadd0(x3, y3, y3);
-  fadd0(y3, x3, y3);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, x3, y_copy);
   fsub0(x3, t1, y3);
-  fadd0(y3, t1, y3);
-  fmul0(y3, x3, y3);
-  fmul0(x3, x3, t3);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, t1, y_copy0);
+  uint64_t y_copy1[4U] = { 0U };
+  memcpy(y_copy1, y3, 4U * sizeof (uint64_t));
+  fmul0(y3, x3, y_copy1);
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, x3, 4U * sizeof (uint64_t));
+  fmul0(x3, x_copy2, t3);
   fadd0(t3, t2, t2);
-  fadd0(t2, t2, t3);
-  fmul_by_b_coeff(z3, z3);
-  fsub0(z3, z3, t2);
-  fsub0(z3, z3, t0);
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, t2, 4U * sizeof (uint64_t));
+  fadd0(t2, x_copy3, t3);
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, z3, 4U * sizeof (uint64_t));
+  fmul_by_b_coeff(z3, x_copy4);
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, z3, 4U * sizeof (uint64_t));
+  fsub0(z3, x_copy5, t2);
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, z3, 4U * sizeof (uint64_t));
+  fsub0(z3, x_copy6, t0);
   fadd0(t3, z3, z3);
-  fadd0(z3, z3, t3);
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy7, t3);
   fadd0(t3, t0, t0);
-  fadd0(t0, t3, t0);
-  fsub0(t0, t0, t2);
-  fmul0(t0, t0, z3);
-  fadd0(y3, y3, t0);
+  uint64_t y_copy2[4U] = { 0U };
+  memcpy(y_copy2, t0, 4U * sizeof (uint64_t));
+  fadd0(t0, t3, y_copy2);
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, t0, 4U * sizeof (uint64_t));
+  fsub0(t0, x_copy8, t2);
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, t0, 4U * sizeof (uint64_t));
+  fmul0(t0, x_copy9, z3);
+  uint64_t x_copy10[4U] = { 0U };
+  memcpy(x_copy10, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, x_copy10, t0);
   fadd0(t0, t4, t4);
-  fmul0(z3, t0, z3);
-  fsub0(x3, x3, z3);
+  uint64_t y_copy3[4U] = { 0U };
+  memcpy(y_copy3, z3, 4U * sizeof (uint64_t));
+  fmul0(z3, t0, y_copy3);
+  uint64_t x_copy11[4U] = { 0U };
+  memcpy(x_copy11, x3, 4U * sizeof (uint64_t));
+  fsub0(x3, x_copy11, z3);
   fmul0(z3, t0, t1);
-  fadd0(z3, z3, z3);
-  fadd0(z3, z3, z3);
+  uint64_t x_copy12[4U] = { 0U };
+  memcpy(x_copy12, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy12, x_copy12);
+  uint64_t x_copy13[4U] = { 0U };
+  memcpy(x_copy13, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy13, x_copy13);
 }
 
 static inline void point_add(uint64_t *res, uint64_t *p, uint64_t *q)
@@ -882,52 +1077,92 @@ static inline void point_add(uint64_t *res, uint64_t *p, uint64_t *q)
   fmul0(t2, z10, z20);
   fadd0(t3, x1, y1);
   fadd0(t4, x20, y20);
-  fmul0(t3, t3, t4);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, t3, 4U * sizeof (uint64_t));
+  fmul0(t3, x_copy0, t4);
   fadd0(t4, t01, t11);
   uint64_t *y10 = p + 4U;
   uint64_t *z11 = p + 8U;
   uint64_t *y2 = q + 4U;
   uint64_t *z21 = q + 8U;
-  fsub0(t3, t3, t4);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, t3, 4U * sizeof (uint64_t));
+  fsub0(t3, x_copy, t4);
   fadd0(t4, y10, z11);
   fadd0(t5, y2, z21);
-  fmul0(t4, t4, t5);
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, t4, 4U * sizeof (uint64_t));
+  fmul0(t4, x_copy1, t5);
   fadd0(t5, t11, t2);
-  fsub0(t4, t4, t5);
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, t4, 4U * sizeof (uint64_t));
+  fsub0(t4, x_copy2, t5);
   uint64_t *x10 = p;
   uint64_t *z1 = p + 8U;
   uint64_t *x2 = q;
   uint64_t *z2 = q + 8U;
   fadd0(x3, x10, z1);
   fadd0(y3, x2, z2);
-  fmul0(x3, x3, y3);
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, x3, 4U * sizeof (uint64_t));
+  fmul0(x3, x_copy3, y3);
   fadd0(y3, t01, t2);
-  fsub0(y3, x3, y3);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x3, y_copy);
   fmul_by_b_coeff(z3, t2);
   fsub0(x3, y3, z3);
   fadd0(z3, x3, x3);
-  fadd0(x3, x3, z3);
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, x3, 4U * sizeof (uint64_t));
+  fadd0(x3, x_copy4, z3);
   fsub0(z3, t11, x3);
-  fadd0(x3, t11, x3);
-  fmul_by_b_coeff(y3, y3);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, x3, 4U * sizeof (uint64_t));
+  fadd0(x3, t11, y_copy0);
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, y3, 4U * sizeof (uint64_t));
+  fmul_by_b_coeff(y3, x_copy5);
   fadd0(t11, t2, t2);
-  fadd0(t2, t11, t2);
-  fsub0(y3, y3, t2);
-  fsub0(y3, y3, t01);
+  uint64_t y_copy1[4U] = { 0U };
+  memcpy(y_copy1, t2, 4U * sizeof (uint64_t));
+  fadd0(t2, t11, y_copy1);
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x_copy6, t2);
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, y3, 4U * sizeof (uint64_t));
+  fsub0(y3, x_copy7, t01);
   fadd0(t11, y3, y3);
-  fadd0(y3, t11, y3);
+  uint64_t y_copy2[4U] = { 0U };
+  memcpy(y_copy2, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, t11, y_copy2);
   fadd0(t11, t01, t01);
-  fadd0(t01, t11, t01);
-  fsub0(t01, t01, t2);
+  uint64_t y_copy3[4U] = { 0U };
+  memcpy(y_copy3, t01, 4U * sizeof (uint64_t));
+  fadd0(t01, t11, y_copy3);
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, t01, 4U * sizeof (uint64_t));
+  fsub0(t01, x_copy8, t2);
   fmul0(t11, t4, y3);
   fmul0(t2, t01, y3);
   fmul0(y3, x3, z3);
-  fadd0(y3, y3, t2);
-  fmul0(x3, t3, x3);
-  fsub0(x3, x3, t11);
-  fmul0(z3, t4, z3);
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, y3, 4U * sizeof (uint64_t));
+  fadd0(y3, x_copy9, t2);
+  uint64_t y_copy4[4U] = { 0U };
+  memcpy(y_copy4, x3, 4U * sizeof (uint64_t));
+  fmul0(x3, t3, y_copy4);
+  uint64_t x_copy10[4U] = { 0U };
+  memcpy(x_copy10, x3, 4U * sizeof (uint64_t));
+  fsub0(x3, x_copy10, t11);
+  uint64_t y_copy5[4U] = { 0U };
+  memcpy(y_copy5, z3, 4U * sizeof (uint64_t));
+  fmul0(z3, t4, y_copy5);
   fmul0(t11, t3, t01);
-  fadd0(z3, z3, t11);
+  uint64_t x_copy11[4U] = { 0U };
+  memcpy(x_copy11, z3, 4U * sizeof (uint64_t));
+  fadd0(z3, x_copy11, t11);
   memcpy(res, t1, 12U * sizeof (uint64_t));
 }
 
@@ -939,23 +1174,35 @@ static inline void point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p)
   uint64_t *t1 = table + 12U;
   make_point_at_inf(t0);
   memcpy(t1, p, 12U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table);
   KRML_MAYBE_FOR7(i,
     0U,
     7U,
     1U,
     uint64_t *t11 = table + (i + 1U) * 12U;
-    point_double(tmp, t11);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, t11, 12U * sizeof (uint64_t));
+    point_double(tmp, p_copy0);
     memcpy(table + (2U * i + 2U) * 12U, tmp, 12U * sizeof (uint64_t));
     uint64_t *t2 = table + (2U * i + 2U) * 12U;
-    point_add(tmp, p, t2);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, p, 12U * sizeof (uint64_t));
+    point_add(tmp, p_copy, t2);
     memcpy(table + (2U * i + 3U) * 12U, tmp, 12U * sizeof (uint64_t)););
   make_point_at_inf(res);
   uint64_t tmp0[12U] = { 0U };
   for (uint32_t i0 = 0U; i0 < 64U; i0++)
   {
-    KRML_MAYBE_FOR4(i, 0U, 4U, 1U, point_double(res, res););
+    KRML_MAYBE_FOR4(i,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[12U] = { 0U };
+      memcpy(p_copy, res, 12U * sizeof (uint64_t));
+      point_double(res, p_copy););
     uint32_t k = 256U - 4U * i0 - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar, k, 4U);
+    KRML_MAYBE_UNUSED_VAR(table);
     memcpy(tmp0, (uint64_t *)table, 12U * sizeof (uint64_t));
     KRML_MAYBE_FOR15(i1,
       0U,
@@ -967,10 +1214,12 @@ static inline void point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p)
         0U,
         12U,
         1U,
-        uint64_t *os = tmp0;
         uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+        uint64_t *os = tmp0;
         os[i] = x;););
-    point_add(res, res, tmp0);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy, tmp0);
   }
 }
 
@@ -987,8 +1236,8 @@ static inline void precomp_get_consttime(const uint64_t *table, uint64_t bits_l,
       0U,
       12U,
       1U,
-      uint64_t *os = tmp;
       uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+      uint64_t *os = tmp;
       os[i] = x;););
 }
 
@@ -1030,23 +1279,41 @@ static inline void point_mul_g(uint64_t *res, uint64_t *scalar)
     0U,
     16U,
     1U,
-    KRML_MAYBE_FOR4(i0, 0U, 4U, 1U, point_double(res, res););
+    KRML_MAYBE_FOR4(i0,
+      0U,
+      4U,
+      1U,
+      uint64_t p_copy[12U] = { 0U };
+      memcpy(p_copy, res, 12U * sizeof (uint64_t));
+      point_double(res, p_copy););
     uint32_t k = 64U - 4U * i - 4U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r4, k, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
-    point_add(res, res, tmp);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy, tmp);
     uint32_t k0 = 64U - 4U * i - 4U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r3, k0, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
-    point_add(res, res, tmp);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy0, tmp);
     uint32_t k1 = 64U - 4U * i - 4U;
     uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r2, k1, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
-    point_add(res, res, tmp);
+    uint64_t p_copy1[12U] = { 0U };
+    memcpy(p_copy1, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy1, tmp);
     uint32_t k2 = 64U - 4U * i - 4U;
     uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64(1U, r1, k2, 4U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_basepoint_table_w4);
     precomp_get_consttime(Hacl_P256_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
-    point_add(res, res, tmp););
+    uint64_t p_copy2[12U] = { 0U };
+    memcpy(p_copy2, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy2, tmp););
   KRML_MAYBE_UNUSED_VAR(q1);
   KRML_MAYBE_UNUSED_VAR(q2);
   KRML_MAYBE_UNUSED_VAR(q3);
@@ -1064,15 +1331,20 @@ point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t
   uint64_t *t1 = table2 + 12U;
   make_point_at_inf(t0);
   memcpy(t1, q2, 12U * sizeof (uint64_t));
+  KRML_MAYBE_UNUSED_VAR(table2);
   KRML_MAYBE_FOR15(i,
     0U,
     15U,
     1U,
     uint64_t *t11 = table2 + (i + 1U) * 12U;
-    point_double(tmp, t11);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, t11, 12U * sizeof (uint64_t));
+    point_double(tmp, p_copy0);
     memcpy(table2 + (2U * i + 2U) * 12U, tmp, 12U * sizeof (uint64_t));
     uint64_t *t2 = table2 + (2U * i + 2U) * 12U;
-    point_add(tmp, q2, t2);
+    uint64_t p_copy[12U] = { 0U };
+    memcpy(p_copy, q2, 12U * sizeof (uint64_t));
+    point_add(tmp, p_copy, t2);
     memcpy(table2 + (2U * i + 3U) * 12U, tmp, 12U * sizeof (uint64_t)););
   uint64_t tmp0[12U] = { 0U };
   uint32_t i0 = 255U;
@@ -1085,25 +1357,39 @@ point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t
   uint32_t bits_l320 = (uint32_t)bits_c0;
   const uint64_t *a_bits_l0 = table2 + bits_l320 * 12U;
   memcpy(tmp0, (uint64_t *)a_bits_l0, 12U * sizeof (uint64_t));
-  point_add(res, res, tmp0);
+  uint64_t p_copy[12U] = { 0U };
+  memcpy(p_copy, res, 12U * sizeof (uint64_t));
+  point_add(res, p_copy, tmp0);
   uint64_t tmp1[12U] = { 0U };
   for (uint32_t i = 0U; i < 51U; i++)
   {
-    KRML_MAYBE_FOR5(i2, 0U, 5U, 1U, point_double(res, res););
+    KRML_MAYBE_FOR5(i2,
+      0U,
+      5U,
+      1U,
+      uint64_t p_copy0[12U] = { 0U };
+      memcpy(p_copy0, res, 12U * sizeof (uint64_t));
+      point_double(res, p_copy0););
     uint32_t k = 255U - 5U * i - 5U;
     uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar2, k, 5U);
+    KRML_MAYBE_UNUSED_VAR(table2);
     uint32_t bits_l321 = (uint32_t)bits_l;
     const uint64_t *a_bits_l1 = table2 + bits_l321 * 12U;
     memcpy(tmp1, (uint64_t *)a_bits_l1, 12U * sizeof (uint64_t));
-    point_add(res, res, tmp1);
+    uint64_t p_copy0[12U] = { 0U };
+    memcpy(p_copy0, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy0, tmp1);
     uint32_t k0 = 255U - 5U * i - 5U;
     uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64(4U, scalar1, k0, 5U);
+    KRML_HOST_IGNORE(Hacl_P256_PrecompTable_precomp_basepoint_table_w5);
     uint32_t bits_l322 = (uint32_t)bits_l0;
     const
     uint64_t
     *a_bits_l2 = Hacl_P256_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * 12U;
     memcpy(tmp1, (uint64_t *)a_bits_l2, 12U * sizeof (uint64_t));
-    point_add(res, res, tmp1);
+    uint64_t p_copy1[12U] = { 0U };
+    memcpy(p_copy1, res, 12U * sizeof (uint64_t));
+    point_add(res, p_copy1, tmp1);
   }
 }
 
@@ -1111,8 +1397,11 @@ static inline uint64_t bn_is_lt_order_mask4(uint64_t *f)
 {
   uint64_t tmp[4U] = { 0U };
   make_order(tmp);
-  uint64_t c = bn_sub4(tmp, f, tmp);
-  return 0ULL - c;
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, tmp, 4U * sizeof (uint64_t));
+  uint64_t c = bn_sub4(tmp, f, y_copy);
+  uint64_t c0 = c;
+  return 0ULL - c0;
 }
 
 static inline uint64_t bn_is_lt_order_and_gt_zero_mask4(uint64_t *f)
@@ -1126,8 +1415,11 @@ static inline void qmod_short(uint64_t *res, uint64_t *x)
 {
   uint64_t tmp[4U] = { 0U };
   make_order(tmp);
-  uint64_t c = bn_sub4(tmp, x, tmp);
-  bn_cmovznz4(res, c, tmp, x);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, tmp, 4U * sizeof (uint64_t));
+  uint64_t c = bn_sub4(tmp, x, y_copy);
+  uint64_t c0 = c;
+  bn_cmovznz4(res, c0, tmp, x);
 }
 
 static inline void qadd(uint64_t *res, uint64_t *x, uint64_t *y)
@@ -1165,8 +1457,8 @@ static inline void qmont_reduction(uint64_t *res, uint64_t *x)
     }
     uint64_t r = c;
     uint64_t c1 = r;
-    uint64_t *resb = x + 4U + i0;
     uint64_t res_j = x[4U + i0];
+    uint64_t *resb = x + 4U + i0;
     c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
   memcpy(res, x + 4U, 4U * sizeof (uint64_t));
   uint64_t c00 = c0;
@@ -1196,8 +1488,8 @@ static inline void qmont_reduction(uint64_t *res, uint64_t *x)
     0U,
     4U,
     1U,
-    uint64_t *os = res;
     uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+    uint64_t *os = res;
     os[i] = x1;);
 }
 
@@ -1238,9 +1530,9 @@ bool Hacl_Impl_P256_DH_ecp256dh_i(uint8_t *public_key, uint8_t *private_key)
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   point_mul_g(pk, sk);
@@ -1270,9 +1562,9 @@ Hacl_Impl_P256_DH_ecp256dh_r(
     0U,
     4U,
     1U,
-    uint64_t *os = sk;
     uint64_t uu____0 = oneq[i];
     uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+    uint64_t *os = sk;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid;
   uint64_t ss_proj[12U] = { 0U };
@@ -1296,98 +1588,348 @@ static inline void qinv(uint64_t *res, uint64_t *r)
   uint64_t *x_101111 = tmp + 24U;
   memcpy(x6, r, 4U * sizeof (uint64_t));
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_11, x6, r);
   qmul(x_101, x6, x_11);
   qmul(x_111, x6, x_101);
   memcpy(x6, x_101, 4U * sizeof (uint64_t));
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_1111, x_101, x6);
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_10101, x6, r);
   memcpy(x6, x_10101, 4U * sizeof (uint64_t));
   {
-    qsqr(x6, x6);
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy);
   }
   qmul(x_101111, x_101, x6);
-  qmul(x6, x_10101, x6);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, x6, 4U * sizeof (uint64_t));
+  qmul(x6, x_10101, y_copy);
   uint64_t tmp1[4U] = { 0U };
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, qsqr(x6, x6););
-  qmul(x6, x6, x_11);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy[4U] = { 0U };
+    memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy););
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, x6, 4U * sizeof (uint64_t));
+  qmul(x6, x_copy, x_11);
   memcpy(tmp1, x6, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR8(i, 0U, 8U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x6);
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint64_t x_copy0[4U] = { 0U };
+    memcpy(x_copy0, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy0););
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy0, x6);
   memcpy(x6, tmp1, 4U * sizeof (uint64_t));
-  KRML_MAYBE_FOR16(i, 0U, 16U, 1U, qsqr(x6, x6););
-  qmul(x6, x6, tmp1);
+  KRML_MAYBE_FOR16(i,
+    0U,
+    16U,
+    1U,
+    uint64_t x_copy1[4U] = { 0U };
+    memcpy(x_copy1, x6, 4U * sizeof (uint64_t));
+    qsqr(x6, x_copy1););
+  uint64_t x_copy1[4U] = { 0U };
+  memcpy(x_copy1, x6, 4U * sizeof (uint64_t));
+  qmul(x6, x_copy1, tmp1);
   memcpy(tmp1, x6, 4U * sizeof (uint64_t));
   for (uint32_t i = 0U; i < 64U; i++)
   {
-    qsqr(tmp1, tmp1);
+    uint64_t x_copy2[4U] = { 0U };
+    memcpy(x_copy2, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy2);
   }
-  qmul(tmp1, tmp1, x6);
+  uint64_t x_copy2[4U] = { 0U };
+  memcpy(x_copy2, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy2, x6);
   for (uint32_t i = 0U; i < 32U; i++)
   {
-    qsqr(tmp1, tmp1);
+    uint64_t x_copy3[4U] = { 0U };
+    memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy3);
   }
-  qmul(tmp1, tmp1, x6);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_10101);
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR9(i, 0U, 9U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101111);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, r);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, r);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR4(i, 0U, 4U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_111);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR10(i, 0U, 10U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_101111);
-  KRML_MAYBE_FOR2(i, 0U, 2U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_11);
-  KRML_MAYBE_FOR3(i, 0U, 3U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, r);
-  KRML_MAYBE_FOR7(i, 0U, 7U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_10101);
-  KRML_MAYBE_FOR6(i, 0U, 6U, 1U, qsqr(tmp1, tmp1););
-  qmul(tmp1, tmp1, x_1111);
+  uint64_t x_copy3[4U] = { 0U };
+  memcpy(x_copy3, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy3, x6);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy4[4U] = { 0U };
+    memcpy(x_copy4, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy4););
+  uint64_t x_copy4[4U] = { 0U };
+  memcpy(x_copy4, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy4, x_101111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy5[4U] = { 0U };
+    memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy5););
+  uint64_t x_copy5[4U] = { 0U };
+  memcpy(x_copy5, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy5, x_111);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy6[4U] = { 0U };
+    memcpy(x_copy6, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy6););
+  uint64_t x_copy6[4U] = { 0U };
+  memcpy(x_copy6, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy6, x_11);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy7[4U] = { 0U };
+    memcpy(x_copy7, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy7););
+  uint64_t x_copy7[4U] = { 0U };
+  memcpy(x_copy7, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy7, x_1111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy8[4U] = { 0U };
+    memcpy(x_copy8, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy8););
+  uint64_t x_copy8[4U] = { 0U };
+  memcpy(x_copy8, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy8, x_10101);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy9[4U] = { 0U };
+    memcpy(x_copy9, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy9););
+  uint64_t x_copy9[4U] = { 0U };
+  memcpy(x_copy9, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy9, x_101);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy10[4U] = { 0U };
+    memcpy(x_copy10, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy10););
+  uint64_t x_copy10[4U] = { 0U };
+  memcpy(x_copy10, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy10, x_101);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy11[4U] = { 0U };
+    memcpy(x_copy11, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy11););
+  uint64_t x_copy11[4U] = { 0U };
+  memcpy(x_copy11, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy11, x_101);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy12[4U] = { 0U };
+    memcpy(x_copy12, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy12););
+  uint64_t x_copy12[4U] = { 0U };
+  memcpy(x_copy12, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy12, x_111);
+  KRML_MAYBE_FOR9(i,
+    0U,
+    9U,
+    1U,
+    uint64_t x_copy13[4U] = { 0U };
+    memcpy(x_copy13, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy13););
+  uint64_t x_copy13[4U] = { 0U };
+  memcpy(x_copy13, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy13, x_101111);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy14[4U] = { 0U };
+    memcpy(x_copy14, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy14););
+  uint64_t x_copy14[4U] = { 0U };
+  memcpy(x_copy14, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy14, x_1111);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy15[4U] = { 0U };
+    memcpy(x_copy15, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy15););
+  uint64_t x_copy15[4U] = { 0U };
+  memcpy(x_copy15, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy15, r);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy16[4U] = { 0U };
+    memcpy(x_copy16, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy16););
+  uint64_t x_copy16[4U] = { 0U };
+  memcpy(x_copy16, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy16, r);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy17[4U] = { 0U };
+    memcpy(x_copy17, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy17););
+  uint64_t x_copy17[4U] = { 0U };
+  memcpy(x_copy17, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy17, x_1111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy18[4U] = { 0U };
+    memcpy(x_copy18, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy18););
+  uint64_t x_copy18[4U] = { 0U };
+  memcpy(x_copy18, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy18, x_111);
+  KRML_MAYBE_FOR4(i,
+    0U,
+    4U,
+    1U,
+    uint64_t x_copy19[4U] = { 0U };
+    memcpy(x_copy19, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy19););
+  uint64_t x_copy19[4U] = { 0U };
+  memcpy(x_copy19, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy19, x_111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy20[4U] = { 0U };
+    memcpy(x_copy20, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy20););
+  uint64_t x_copy20[4U] = { 0U };
+  memcpy(x_copy20, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy20, x_111);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy21[4U] = { 0U };
+    memcpy(x_copy21, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy21););
+  uint64_t x_copy21[4U] = { 0U };
+  memcpy(x_copy21, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy21, x_101);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy22[4U] = { 0U };
+    memcpy(x_copy22, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy22););
+  uint64_t x_copy22[4U] = { 0U };
+  memcpy(x_copy22, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy22, x_11);
+  KRML_MAYBE_FOR10(i,
+    0U,
+    10U,
+    1U,
+    uint64_t x_copy23[4U] = { 0U };
+    memcpy(x_copy23, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy23););
+  uint64_t x_copy23[4U] = { 0U };
+  memcpy(x_copy23, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy23, x_101111);
+  KRML_MAYBE_FOR2(i,
+    0U,
+    2U,
+    1U,
+    uint64_t x_copy24[4U] = { 0U };
+    memcpy(x_copy24, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy24););
+  uint64_t x_copy24[4U] = { 0U };
+  memcpy(x_copy24, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy24, x_11);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy25[4U] = { 0U };
+    memcpy(x_copy25, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy25););
+  uint64_t x_copy25[4U] = { 0U };
+  memcpy(x_copy25, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy25, x_11);
+  KRML_MAYBE_FOR5(i,
+    0U,
+    5U,
+    1U,
+    uint64_t x_copy26[4U] = { 0U };
+    memcpy(x_copy26, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy26););
+  uint64_t x_copy26[4U] = { 0U };
+  memcpy(x_copy26, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy26, x_11);
+  KRML_MAYBE_FOR3(i,
+    0U,
+    3U,
+    1U,
+    uint64_t x_copy27[4U] = { 0U };
+    memcpy(x_copy27, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy27););
+  uint64_t x_copy27[4U] = { 0U };
+  memcpy(x_copy27, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy27, r);
+  KRML_MAYBE_FOR7(i,
+    0U,
+    7U,
+    1U,
+    uint64_t x_copy28[4U] = { 0U };
+    memcpy(x_copy28, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy28););
+  uint64_t x_copy28[4U] = { 0U };
+  memcpy(x_copy28, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy28, x_10101);
+  KRML_MAYBE_FOR6(i,
+    0U,
+    6U,
+    1U,
+    uint64_t x_copy29[4U] = { 0U };
+    memcpy(x_copy29, tmp1, 4U * sizeof (uint64_t));
+    qsqr(tmp1, x_copy29););
+  uint64_t x_copy29[4U] = { 0U };
+  memcpy(x_copy29, tmp1, 4U * sizeof (uint64_t));
+  qmul(tmp1, x_copy29, x_1111);
   memcpy(x6, tmp1, 4U * sizeof (uint64_t));
   memcpy(res, x6, 4U * sizeof (uint64_t));
 }
@@ -1435,7 +1977,9 @@ ecdsa_verify_msg_as_qelem(
   }
   uint64_t x[4U] = { 0U };
   to_aff_point_x(x, res);
-  qmod_short(x, x);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, x, 4U * sizeof (uint64_t));
+  qmod_short(x, x_copy);
   bool res1 = bn_is_eq_vartime4(x, r_q);
   return res1;
 }
@@ -1464,9 +2008,9 @@ ecdsa_sign_msg_as_qelem(
     0U,
     4U,
     1U,
-    uint64_t *os = d_a;
     uint64_t uu____0 = oneq0[i];
     uint64_t x = uu____0 ^ (is_b_valid0 & (d_a[i] ^ uu____0));
+    uint64_t *os = d_a;
     os[i] = x;);
   uint64_t is_sk_valid = is_b_valid0;
   bn_from_bytes_be4(k_q, nonce);
@@ -1480,22 +2024,30 @@ ecdsa_sign_msg_as_qelem(
     0U,
     4U,
     1U,
-    uint64_t *os = k_q;
     uint64_t uu____1 = oneq[i];
     uint64_t x = uu____1 ^ (is_b_valid & (k_q[i] ^ uu____1));
+    uint64_t *os = k_q;
     os[i] = x;);
   uint64_t is_nonce_valid = is_b_valid;
   uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid;
   uint64_t p[12U] = { 0U };
   point_mul_g(p, k_q);
   to_aff_point_x(r_q, p);
-  qmod_short(r_q, r_q);
+  uint64_t x_copy0[4U] = { 0U };
+  memcpy(x_copy0, r_q, 4U * sizeof (uint64_t));
+  qmod_short(r_q, x_copy0);
   uint64_t kinv[4U] = { 0U };
   qinv(kinv, k_q);
   qmul(s_q, r_q, d_a);
-  from_qmont(m_q, m_q);
-  qadd(s_q, m_q, s_q);
-  qmul(s_q, kinv, s_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  from_qmont(m_q, x_copy);
+  uint64_t y_copy[4U] = { 0U };
+  memcpy(y_copy, s_q, 4U * sizeof (uint64_t));
+  qadd(s_q, m_q, y_copy);
+  uint64_t y_copy0[4U] = { 0U };
+  memcpy(y_copy0, s_q, 4U * sizeof (uint64_t));
+  qmul(s_q, kinv, y_copy0);
   bn2_to_bytes_be4(signature, r_q, s_q);
   uint64_t is_r_zero = bn_is_zero_mask4(r_q);
   uint64_t is_s_zero = bn_is_zero_mask4(s_q);
@@ -1551,7 +2103,9 @@ Hacl_P256_ecdsa_sign_p256_sha2(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1584,7 +2138,9 @@ Hacl_P256_ecdsa_sign_p256_sha384(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1617,7 +2173,9 @@ Hacl_P256_ecdsa_sign_p256_sha512(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1660,7 +2218,9 @@ Hacl_P256_ecdsa_sign_p256_without_hash(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   return res;
 }
@@ -1696,7 +2256,9 @@ Hacl_P256_ecdsa_verif_p256_sha2(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1727,7 +2289,9 @@ Hacl_P256_ecdsa_verif_p256_sha384(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1758,7 +2322,9 @@ Hacl_P256_ecdsa_verif_p256_sha512(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1794,7 +2360,9 @@ Hacl_P256_ecdsa_verif_without_hash(
   KRML_MAYBE_UNUSED_VAR(msg_len);
   uint8_t *mHash32 = mHash;
   bn_from_bytes_be4(m_q, mHash32);
-  qmod_short(m_q, m_q);
+  uint64_t x_copy[4U] = { 0U };
+  memcpy(x_copy, m_q, 4U * sizeof (uint64_t));
+  qmod_short(m_q, x_copy);
   bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   return res;
 }
@@ -1891,8 +2459,8 @@ bool Hacl_P256_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw)
 {
   uint64_t xa[4U] = { 0U };
   uint64_t ya[4U] = { 0U };
-  uint8_t *pk_xb = pk + 1U;
   bool b = aff_point_decompress_vartime(xa, ya, pk);
+  uint8_t *pk_xb = pk + 1U;
   if (b)
   {
     memcpy(pk_raw, pk_xb, 32U * sizeof (uint8_t));
diff --git a/src/Hacl_RSAPSS.c b/src/Hacl_RSAPSS.c
index 71e141d0..fa8244e3 100644
--- a/src/Hacl_RSAPSS.c
+++ b/src/Hacl_RSAPSS.c
@@ -167,7 +167,7 @@ static inline uint64_t check_num_bits_u64(uint32_t bs, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t res = acc;
   return res;
@@ -189,7 +189,7 @@ static inline uint64_t check_modulus_u64(uint32_t modBits, uint64_t *n)
   {
     uint64_t beq = FStar_UInt64_eq_mask(b2[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(b2[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t res = acc;
   uint64_t m1 = res;
@@ -252,8 +252,8 @@ pss_encode(
   mgf_hash(a, hLen, m1Hash, dbLen, dbMask);
   for (uint32_t i = 0U; i < dbLen; i++)
   {
-    uint8_t *os = db;
     uint8_t x = (uint32_t)db[i] ^ (uint32_t)dbMask[i];
+    uint8_t *os = db;
     os[i] = x;
   }
   uint32_t msBits = emBits % 8U;
@@ -288,11 +288,7 @@ pss_verify(
     em_0 = 0U;
   }
   uint8_t em_last = em[emLen - 1U];
-  if (emLen < saltLen + hash_len(a) + 2U)
-  {
-    return false;
-  }
-  if (!(em_last == 0xbcU && em_0 == 0U))
+  if (emLen < saltLen + hash_len(a) + 2U || !(em_last == 0xbcU && em_0 == 0U))
   {
     return false;
   }
@@ -310,8 +306,8 @@ pss_verify(
   mgf_hash(a, hLen, m1Hash, dbLen, dbMask);
   for (uint32_t i = 0U; i < dbLen; i++)
   {
-    uint8_t *os = dbMask;
     uint8_t x = (uint32_t)dbMask[i] ^ (uint32_t)maskedDB[i];
+    uint8_t *os = dbMask;
     os[i] = x;
   }
   uint32_t msBits1 = emBits % 8U;
@@ -486,9 +482,9 @@ Hacl_RSAPSS_rsapss_sign(
     uint64_t eq_m = mask1;
     for (uint32_t i = 0U; i < nLen2; i++)
     {
-      uint64_t *os = s;
       uint64_t x = s[i];
       uint64_t x0 = eq_m & x;
+      uint64_t *os = s;
       os[i] = x0;
     }
     bool eq_b = eq_m == 0xFFFFFFFFFFFFFFFFULL;
@@ -553,7 +549,7 @@ Hacl_RSAPSS_rsapss_verify(
     {
       uint64_t beq = FStar_UInt64_eq_mask(s[i], n[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(s[i], n[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t mask = acc;
     bool res;
@@ -568,10 +564,9 @@ Hacl_RSAPSS_rsapss_verify(
         eBits,
         e,
         m);
-      bool ite;
       if (!((modBits - 1U) % 8U == 0U))
       {
-        ite = true;
+        res = true;
       }
       else
       {
@@ -579,15 +574,7 @@ Hacl_RSAPSS_rsapss_verify(
         uint32_t j = (modBits - 1U) % 64U;
         uint64_t tmp = m[i];
         uint64_t get_bit = tmp >> j & 1ULL;
-        ite = get_bit == 0ULL;
-      }
-      if (ite)
-      {
-        res = true;
-      }
-      else
-      {
-        res = false;
+        res = get_bit == 0ULL;
       }
     }
     else
diff --git a/src/Hacl_SHA2_Vec128.c b/src/Hacl_SHA2_Vec128.c
index 02af75b1..e122dd8c 100644
--- a/src/Hacl_SHA2_Vec128.c
+++ b/src/Hacl_SHA2_Vec128.c
@@ -35,9 +35,9 @@ static inline void sha224_init4(Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h224[i];
     Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_load32(hi);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
@@ -286,9 +286,9 @@ sha224_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     Lib_IntVector_Intrinsics_vec128
     x = Lib_IntVector_Intrinsics_vec128_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
@@ -515,9 +515,9 @@ static inline void sha256_init4(Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h256[i];
     Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_load32(hi);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
@@ -766,9 +766,9 @@ sha256_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec128 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec128 *os = hash;
     Lib_IntVector_Intrinsics_vec128
     x = Lib_IntVector_Intrinsics_vec128_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec128 *os = hash;
     os[i] = x;);
 }
 
diff --git a/src/Hacl_SHA2_Vec256.c b/src/Hacl_SHA2_Vec256.c
index c34767f5..2bee1692 100644
--- a/src/Hacl_SHA2_Vec256.c
+++ b/src/Hacl_SHA2_Vec256.c
@@ -36,9 +36,9 @@ static inline void sha224_init8(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h224[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load32(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -371,9 +371,9 @@ sha224_update8(Hacl_Hash_SHA2_uint8_8p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -785,9 +785,9 @@ static inline void sha256_init8(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint32_t hi = Hacl_Hash_SHA2_h256[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load32(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1120,9 +1120,9 @@ sha256_update8(Hacl_Hash_SHA2_uint8_8p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add32(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1534,9 +1534,9 @@ static inline void sha384_init4(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint64_t hi = Hacl_Hash_SHA2_h384[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load64(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1769,9 +1769,9 @@ sha384_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add64(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -1990,9 +1990,9 @@ static inline void sha512_init4(Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     uint64_t hi = Hacl_Hash_SHA2_h512[i];
     Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_load64(hi);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
@@ -2225,9 +2225,9 @@ sha512_update4(Hacl_Hash_SHA2_uint8_4p b, Lib_IntVector_Intrinsics_vec256 *hash)
     0U,
     8U,
     1U,
-    Lib_IntVector_Intrinsics_vec256 *os = hash;
     Lib_IntVector_Intrinsics_vec256
     x = Lib_IntVector_Intrinsics_vec256_add64(hash[i], hash_old[i]);
+    Lib_IntVector_Intrinsics_vec256 *os = hash;
     os[i] = x;);
 }
 
diff --git a/src/Hacl_Salsa20.c b/src/Hacl_Salsa20.c
index 151df07d..372fd3c5 100644
--- a/src/Hacl_Salsa20.c
+++ b/src/Hacl_Salsa20.c
@@ -85,8 +85,8 @@ static inline void salsa20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr)
     0U,
     16U,
     1U,
-    uint32_t *os = k;
     uint32_t x = k[i] + ctx[i];
+    uint32_t *os = k;
     os[i] = x;);
   k[8U] = k[8U] + ctr_u32;
 }
@@ -101,21 +101,21 @@ static inline void salsa20_key_block0(uint8_t *out, uint8_t *key, uint8_t *n)
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   ctx[0U] = 0x61707865U;
   uint32_t *k0 = k32;
@@ -149,21 +149,21 @@ salsa20_encrypt(
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   ctx[0U] = 0x61707865U;
   uint32_t *k0 = k32;
@@ -192,18 +192,18 @@ salsa20_encrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = uu____1 + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(uu____0 + i * 4U, bl[i]););
   }
@@ -219,18 +219,18 @@ salsa20_encrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = plain + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(plain + i * 4U, bl[i]););
     memcpy(uu____2, plain, rem * sizeof (uint8_t));
@@ -254,21 +254,21 @@ salsa20_decrypt(
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR2(i,
     0U,
     2U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   ctx[0U] = 0x61707865U;
   uint32_t *k0 = k32;
@@ -297,18 +297,18 @@ salsa20_decrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = uu____1 + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(uu____0 + i * 4U, bl[i]););
   }
@@ -324,18 +324,18 @@ salsa20_decrypt(
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint8_t *bj = plain + i * 4U;
       uint32_t u = load32_le(bj);
       uint32_t r = u;
       uint32_t x = r;
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i,
       0U,
       16U,
       1U,
-      uint32_t *os = bl;
       uint32_t x = bl[i] ^ k1[i];
+      uint32_t *os = bl;
       os[i] = x;);
     KRML_MAYBE_FOR16(i, 0U, 16U, 1U, store32_le(plain + i * 4U, bl[i]););
     memcpy(uu____2, plain, rem * sizeof (uint8_t));
@@ -351,21 +351,21 @@ static inline void hsalsa20(uint8_t *out, uint8_t *key, uint8_t *n)
     0U,
     8U,
     1U,
-    uint32_t *os = k32;
     uint8_t *bj = key + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = k32;
     os[i] = x;);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
     1U,
-    uint32_t *os = n32;
     uint8_t *bj = n + i * 4U;
     uint32_t u = load32_le(bj);
     uint32_t r = u;
     uint32_t x = r;
+    uint32_t *os = n32;
     os[i] = x;);
   uint32_t *k0 = k32;
   uint32_t *k1 = k32 + 4U;
diff --git a/src/Lib_Memzero0.c b/src/Lib_Memzero0.c
index 3d8a1e5f..5c269d23 100644
--- a/src/Lib_Memzero0.c
+++ b/src/Lib_Memzero0.c
@@ -13,7 +13,7 @@
 #include <string.h>
 #endif
 
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include <strings.h>
 #endif
 
@@ -36,7 +36,7 @@ void Lib_Memzero0_memzero0(void *dst, uint64_t len) {
   size_t len_ = (size_t) len;
 
   #ifdef _WIN32
-    SecureZeroMemory(dst, len);
+    SecureZeroMemory(dst, len_);
   #elif defined(__APPLE__) && defined(__MACH__)
     memset_s(dst, len_, 0, len_);
   #elif (defined(__linux__) && !defined(LINUX_NO_EXPLICIT_BZERO)) || defined(__FreeBSD__)
diff --git a/src/msvc/EverCrypt_AEAD.c b/src/msvc/EverCrypt_AEAD.c
index b0fb4826..89965054 100644
--- a/src/msvc/EverCrypt_AEAD.c
+++ b/src/msvc/EverCrypt_AEAD.c
@@ -538,26 +538,27 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm_no_check(
   KRML_MAYBE_UNUSED_VAR(cipher);
   KRML_MAYBE_UNUSED_VAR(tag);
   #if HACL_CAN_COMPILE_VALE
-  uint8_t ek[480U] = { 0U };
-  uint8_t *keys_b0 = ek;
-  uint8_t *hkeys_b0 = ek + 176U;
+  uint8_t ek0[480U] = { 0U };
+  uint8_t *keys_b0 = ek0;
+  uint8_t *hkeys_b0 = ek0 + 176U;
   aes128_key_expansion(k, keys_b0);
   aes128_keyhash_init(keys_b0, hkeys_b0);
-  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek };
+  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek0 };
   EverCrypt_AEAD_state_s *s = &p;
+  EverCrypt_Error_error_code r;
   if (s == NULL)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+    r = EverCrypt_Error_InvalidKey;
   }
   else if (iv_len == 0U)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+    r = EverCrypt_Error_InvalidIVLength;
   }
   else
   {
-    uint8_t *ek0 = (*s).ek;
-    uint8_t *scratch_b = ek0 + 304U;
-    uint8_t *ek1 = ek0;
+    uint8_t *ek = (*s).ek;
+    uint8_t *scratch_b = ek + 304U;
+    uint8_t *ek1 = ek;
     uint8_t *keys_b = ek1;
     uint8_t *hkeys_b = ek1 + 176U;
     uint8_t tmp_iv[16U] = { 0U };
@@ -637,8 +638,9 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm_no_check(
     memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
       inout_b,
       (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-    KRML_HOST_IGNORE(EverCrypt_Error_Success);
+    r = EverCrypt_Error_Success;
   }
+  KRML_MAYBE_UNUSED_VAR(r);
   return EverCrypt_Error_Success;
   #else
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n",
@@ -680,26 +682,27 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm_no_check(
   KRML_MAYBE_UNUSED_VAR(cipher);
   KRML_MAYBE_UNUSED_VAR(tag);
   #if HACL_CAN_COMPILE_VALE
-  uint8_t ek[544U] = { 0U };
-  uint8_t *keys_b0 = ek;
-  uint8_t *hkeys_b0 = ek + 240U;
+  uint8_t ek0[544U] = { 0U };
+  uint8_t *keys_b0 = ek0;
+  uint8_t *hkeys_b0 = ek0 + 240U;
   aes256_key_expansion(k, keys_b0);
   aes256_keyhash_init(keys_b0, hkeys_b0);
-  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek };
+  EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek0 };
   EverCrypt_AEAD_state_s *s = &p;
+  EverCrypt_Error_error_code r;
   if (s == NULL)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+    r = EverCrypt_Error_InvalidKey;
   }
   else if (iv_len == 0U)
   {
-    KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+    r = EverCrypt_Error_InvalidIVLength;
   }
   else
   {
-    uint8_t *ek0 = (*s).ek;
-    uint8_t *scratch_b = ek0 + 368U;
-    uint8_t *ek1 = ek0;
+    uint8_t *ek = (*s).ek;
+    uint8_t *scratch_b = ek + 368U;
+    uint8_t *ek1 = ek;
     uint8_t *keys_b = ek1;
     uint8_t *hkeys_b = ek1 + 240U;
     uint8_t tmp_iv[16U] = { 0U };
@@ -779,8 +782,9 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm_no_check(
     memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
       inout_b,
       (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-    KRML_HOST_IGNORE(EverCrypt_Error_Success);
+    r = EverCrypt_Error_Success;
   }
+  KRML_MAYBE_UNUSED_VAR(r);
   return EverCrypt_Error_Success;
   #else
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n",
@@ -821,26 +825,27 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm(
   bool has_aesni = EverCrypt_AutoConfig2_has_aesni();
   if (has_aesni && has_pclmulqdq && has_avx && has_sse && has_movbe)
   {
-    uint8_t ek[480U] = { 0U };
-    uint8_t *keys_b0 = ek;
-    uint8_t *hkeys_b0 = ek + 176U;
+    uint8_t ek0[480U] = { 0U };
+    uint8_t *keys_b0 = ek0;
+    uint8_t *hkeys_b0 = ek0 + 176U;
     aes128_key_expansion(k, keys_b0);
     aes128_keyhash_init(keys_b0, hkeys_b0);
-    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek };
+    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES128, .ek = ek0 };
     EverCrypt_AEAD_state_s *s = &p;
+    EverCrypt_Error_error_code r;
     if (s == NULL)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+      r = EverCrypt_Error_InvalidKey;
     }
     else if (iv_len == 0U)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+      r = EverCrypt_Error_InvalidIVLength;
     }
     else
     {
-      uint8_t *ek0 = (*s).ek;
-      uint8_t *scratch_b = ek0 + 304U;
-      uint8_t *ek1 = ek0;
+      uint8_t *ek = (*s).ek;
+      uint8_t *scratch_b = ek + 304U;
+      uint8_t *ek1 = ek;
       uint8_t *keys_b = ek1;
       uint8_t *hkeys_b = ek1 + 176U;
       uint8_t tmp_iv[16U] = { 0U };
@@ -920,8 +925,9 @@ EverCrypt_AEAD_encrypt_expand_aes128_gcm(
       memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
         inout_b,
         (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-      KRML_HOST_IGNORE(EverCrypt_Error_Success);
+      r = EverCrypt_Error_Success;
     }
+    KRML_MAYBE_UNUSED_VAR(r);
     return EverCrypt_Error_Success;
   }
   return EverCrypt_Error_UnsupportedAlgorithm;
@@ -960,26 +966,27 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm(
   bool has_aesni = EverCrypt_AutoConfig2_has_aesni();
   if (has_aesni && has_pclmulqdq && has_avx && has_sse && has_movbe)
   {
-    uint8_t ek[544U] = { 0U };
-    uint8_t *keys_b0 = ek;
-    uint8_t *hkeys_b0 = ek + 240U;
+    uint8_t ek0[544U] = { 0U };
+    uint8_t *keys_b0 = ek0;
+    uint8_t *hkeys_b0 = ek0 + 240U;
     aes256_key_expansion(k, keys_b0);
     aes256_keyhash_init(keys_b0, hkeys_b0);
-    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek };
+    EverCrypt_AEAD_state_s p = { .impl = Spec_Cipher_Expansion_Vale_AES256, .ek = ek0 };
     EverCrypt_AEAD_state_s *s = &p;
+    EverCrypt_Error_error_code r;
     if (s == NULL)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidKey);
+      r = EverCrypt_Error_InvalidKey;
     }
     else if (iv_len == 0U)
     {
-      KRML_HOST_IGNORE(EverCrypt_Error_InvalidIVLength);
+      r = EverCrypt_Error_InvalidIVLength;
     }
     else
     {
-      uint8_t *ek0 = (*s).ek;
-      uint8_t *scratch_b = ek0 + 368U;
-      uint8_t *ek1 = ek0;
+      uint8_t *ek = (*s).ek;
+      uint8_t *scratch_b = ek + 368U;
+      uint8_t *ek1 = ek;
       uint8_t *keys_b = ek1;
       uint8_t *hkeys_b = ek1 + 240U;
       uint8_t tmp_iv[16U] = { 0U };
@@ -1059,8 +1066,9 @@ EverCrypt_AEAD_encrypt_expand_aes256_gcm(
       memcpy(cipher + (uint32_t)(uint64_t)plain_len / 16U * 16U,
         inout_b,
         (uint32_t)(uint64_t)plain_len % 16U * sizeof (uint8_t));
-      KRML_HOST_IGNORE(EverCrypt_Error_Success);
+      r = EverCrypt_Error_Success;
     }
+    KRML_MAYBE_UNUSED_VAR(r);
     return EverCrypt_Error_Success;
   }
   return EverCrypt_Error_UnsupportedAlgorithm;
diff --git a/src/msvc/EverCrypt_HMAC.c b/src/msvc/EverCrypt_HMAC.c
index 386cb17f..baa3b864 100644
--- a/src/msvc/EverCrypt_HMAC.c
+++ b/src/msvc/EverCrypt_HMAC.c
@@ -81,10 +81,8 @@ EverCrypt_HMAC_compute_sha1(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -105,19 +103,17 @@ EverCrypt_HMAC_compute_sha1(
   {
     Hacl_Hash_SHA1_hash_oneshot(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -189,10 +185,8 @@ EverCrypt_HMAC_compute_sha2_256(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -213,19 +207,17 @@ EverCrypt_HMAC_compute_sha2_256(
   {
     EverCrypt_HMAC_hash_256(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -311,10 +303,8 @@ EverCrypt_HMAC_compute_sha2_384(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -335,19 +325,17 @@ EverCrypt_HMAC_compute_sha2_384(
   {
     Hacl_Hash_SHA2_hash_384(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -441,10 +429,8 @@ EverCrypt_HMAC_compute_sha2_512(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -465,19 +451,17 @@ EverCrypt_HMAC_compute_sha2_512(
   {
     Hacl_Hash_SHA2_hash_512(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -571,10 +555,8 @@ EverCrypt_HMAC_compute_blake2s(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -595,19 +577,17 @@ EverCrypt_HMAC_compute_blake2s(
   {
     Hacl_Hash_Blake2s_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -620,7 +600,7 @@ EverCrypt_HMAC_compute_blake2s(
   if (data_len == 0U)
   {
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -655,6 +635,7 @@ EverCrypt_HMAC_compute_blake2s(
     Hacl_Hash_Blake2s_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
@@ -693,6 +674,7 @@ EverCrypt_HMAC_compute_blake2s(
   Hacl_Hash_Blake2s_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
@@ -708,10 +690,8 @@ EverCrypt_HMAC_compute_blake2b(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -732,19 +712,17 @@ EverCrypt_HMAC_compute_blake2b(
   {
     Hacl_Hash_Blake2b_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -757,7 +735,13 @@ EverCrypt_HMAC_compute_blake2b(
   if (data_len == 0U)
   {
     uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -792,6 +776,7 @@ EverCrypt_HMAC_compute_blake2b(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
@@ -831,6 +816,7 @@ EverCrypt_HMAC_compute_blake2b(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/msvc/EverCrypt_Hash.c b/src/msvc/EverCrypt_Hash.c
index bfafa9be..153063cc 100644
--- a/src/msvc/EverCrypt_Hash.c
+++ b/src/msvc/EverCrypt_Hash.c
@@ -616,7 +616,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
   {
     uint32_t *p1 = scrut.case_Blake2S_s;
     uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
   }
   if (scrut.tag == Blake2S_128_s)
@@ -624,7 +624,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Lib_IntVector_Intrinsics_vec128 *p1 = scrut.case_Blake2S_128_s;
     #if HACL_CAN_COMPILE_VEC128
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, prev_len, last_len, last);
+    Hacl_Hash_Blake2s_Simd128_update_last(last_len, wv, p1, false, prev_len, last_len, last);
     return;
     #else
     KRML_MAYBE_UNUSED_VAR(p1);
@@ -638,6 +638,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
@@ -651,6 +652,7 @@ update_last(EverCrypt_Hash_state_s *s, uint64_t prev_len, uint8_t *last, uint32_
     Hacl_Hash_Blake2b_Simd256_update_last(last_len,
       wv,
       p1,
+      false,
       FStar_UInt128_uint64_to_uint128(prev_len),
       last_len,
       last);
diff --git a/src/msvc/Hacl_Bignum.c b/src/msvc/Hacl_Bignum.c
index b99423f3..a87f2267 100644
--- a/src/msvc/Hacl_Bignum.c
+++ b/src/msvc/Hacl_Bignum.c
@@ -832,7 +832,7 @@ uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m1 = acc;
   return m0 & m1;
@@ -1023,7 +1023,7 @@ uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   return m0 & m1;
@@ -1415,7 +1415,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m10 = acc0;
   uint32_t m00 = m0 & m10;
@@ -1442,7 +1442,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
     {
       uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
       uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint32_t res = acc;
     m1 = res;
@@ -1456,7 +1456,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t m = m1 & m2;
@@ -1809,7 +1809,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m10 = acc0;
   uint64_t m00 = m0 & m10;
@@ -1836,7 +1836,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
     {
       uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t res = acc;
     m1 = res;
@@ -1850,7 +1850,7 @@ Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t m = m1 & m2;
diff --git a/src/msvc/Hacl_Bignum256.c b/src/msvc/Hacl_Bignum256.c
index a4f00b83..bd67656b 100644
--- a/src/msvc/Hacl_Bignum256.c
+++ b/src/msvc/Hacl_Bignum256.c
@@ -512,7 +512,7 @@ bool Hacl_Bignum256_mod(uint64_t *n, uint64_t *a, uint64_t *res)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t m1 = acc;
   uint64_t is_valid_m = m0 & m1;
   uint32_t nBits = 64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64(4U, n);
@@ -544,7 +544,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint64_t m10 = acc0;
   uint64_t m00 = m0 & m10;
   uint32_t bLen;
@@ -570,7 +570,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     {
       uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t res = acc;
     m1 = res;
@@ -586,7 +586,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t m2 = acc;
   uint64_t m = m1 & m2;
   return m00 & m;
@@ -990,7 +990,7 @@ bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *re
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint64_t m1 = acc0;
   uint64_t m00 = m0 & m1;
   uint64_t bn_zero[4U] = { 0U };
@@ -1011,7 +1011,7 @@ bool Hacl_Bignum256_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *re
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t m2 = acc;
   uint64_t is_valid_m = (m00 & ~m10) & m2;
   uint32_t nBits = 64U * (uint32_t)Hacl_Bignum_Lib_bn_get_top_index_u64(4U, n);
@@ -1351,7 +1351,7 @@ uint64_t Hacl_Bignum256_lt_mask(uint64_t *a, uint64_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   return acc;
 }
 
diff --git a/src/msvc/Hacl_Bignum256_32.c b/src/msvc/Hacl_Bignum256_32.c
index 29a5a52e..b4490e6c 100644
--- a/src/msvc/Hacl_Bignum256_32.c
+++ b/src/msvc/Hacl_Bignum256_32.c
@@ -532,7 +532,7 @@ bool Hacl_Bignum256_32_mod(uint32_t *n, uint32_t *a, uint32_t *res)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   uint32_t m1 = acc;
   uint32_t is_valid_m = m0 & m1;
   uint32_t nBits = 32U * Hacl_Bignum_Lib_bn_get_top_index_u32(8U, n);
@@ -564,7 +564,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint32_t m10 = acc0;
   uint32_t m00 = m0 & m10;
   uint32_t bLen;
@@ -590,7 +590,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     {
       uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
       uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint32_t res = acc;
     m1 = res;
@@ -606,7 +606,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   uint32_t m2 = acc;
   uint32_t m = m1 & m2;
   return m00 & m;
@@ -1010,7 +1010,7 @@ bool Hacl_Bignum256_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc0 = (beq & acc0) | (~beq & blt););
   uint32_t m1 = acc0;
   uint32_t m00 = m0 & m1;
   uint32_t bn_zero[8U] = { 0U };
@@ -1031,7 +1031,7 @@ bool Hacl_Bignum256_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   uint32_t m2 = acc;
   uint32_t is_valid_m = (m00 & ~m10) & m2;
   uint32_t nBits = 32U * Hacl_Bignum_Lib_bn_get_top_index_u32(8U, n);
@@ -1399,7 +1399,7 @@ uint32_t Hacl_Bignum256_32_lt_mask(uint32_t *a, uint32_t *b)
     1U,
     uint32_t beq = FStar_UInt32_eq_mask(a[i], b[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U))););
+    acc = (beq & acc) | (~beq & blt););
   return acc;
 }
 
diff --git a/src/msvc/Hacl_Bignum32.c b/src/msvc/Hacl_Bignum32.c
index 55c3f90c..dcb7b7ec 100644
--- a/src/msvc/Hacl_Bignum32.c
+++ b/src/msvc/Hacl_Bignum32.c
@@ -46,9 +46,18 @@ of `len` unsigned 32-bit integers, i.e. uint32_t[len].
 /**
 Write `a + b mod 2 ^ (32 * len)` in `res`.
 
-  This functions returns the carry.
-
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  This function returns the carry.
+  
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly equal memory
+    location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_add(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -60,7 +69,16 @@ Write `a - b mod 2 ^ (32 * len)` in `res`.
 
   This functions returns the carry.
 
-  The arguments a, b and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len]
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[out] res Points to `len` number of limbs where the carry is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
 */
 uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -70,12 +88,23 @@ uint32_t Hacl_Bignum32_sub(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res
 /**
 Write `(a + b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -85,12 +114,23 @@ void Hacl_Bignum32_add_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b,
 /**
 Write `(a - b) mod n` in `res`.
 
-  The arguments a, b, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • a < n
-  • b < n
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `b` or `res`. May have exactly
+    equal memory location to `b` or `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must not
+    partially overlap the memory locations of `a` or `res`. May have exactly
+    equal memory location to `a` or `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `res`.
+  @param[out] res Points to `len` number of limbs where the result is written, i.e. `uint32_t[len]`.
+    Must not partially overlap the memory locations of `a` or `b`. May have
+    exactly equal memory location to `a` or `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `a < n`
+    - `b < n`
 */
 void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -100,8 +140,13 @@ void Hacl_Bignum32_sub_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *b,
 /**
 Write `a * b` in `res`.
 
-  The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] len Number of limbs.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `b` and `res`.
+  @param[in] b Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory locations of `a` and `b`.
 */
 void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 {
@@ -114,8 +159,10 @@ void Hacl_Bignum32_mul(uint32_t len, uint32_t *a, uint32_t *b, uint32_t *res)
 /**
 Write `a * a` in `res`.
 
-  The argument a is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The outparam res is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `2*len` number of limbs where the result is written, i.e. `uint32_t[2*len]`.
+    Must be disjoint from the memory location of `a`.
 */
 void Hacl_Bignum32_sqr(uint32_t len, uint32_t *a, uint32_t *res)
 {
@@ -149,13 +196,19 @@ bn_slow_precomp(
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The argument n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • 1 < n
-   • n % 2 = 1 
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `n`.
+  
+  @return `false` if any precondition is violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `1 < n`
+    - `n % 2 = 1`
 */
 bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -171,7 +224,7 @@ bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m1 = acc;
   uint32_t is_valid_m = m0 & m1;
@@ -195,22 +248,30 @@ bool Hacl_Bignum32_mod(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+  
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_vartime(
@@ -238,22 +299,30 @@ Hacl_Bignum32_mod_exp_vartime(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime.
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-   • n % 2 = 1
-   • 1 < n
-   • b < pow2 bBits
-   • a < n
+  This function is constant-time over its argument `b`, at the cost of a slower
+  execution time than `mod_exp_vartime_*`.
+  
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a`, `b`, and `n`.
+    
+  @return `false` if any preconditions are violated, `true` otherwise.
+
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
+    - `b < pow2 bBits`
+    - `a < n`
 */
 bool
 Hacl_Bignum32_mod_exp_consttime(
@@ -281,18 +350,23 @@ Hacl_Bignum32_mod_exp_consttime(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The arguments a, n and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-
-  The function returns false if any of the following preconditions are violated,
-  true otherwise.
-  • n % 2 = 1
-  • 1 < n
-  • 0 < a
-  • a < n
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `n` and `res`.
+  @param[in] n Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a` and `n`.
+    
+  @return `false` if any preconditions (except the precondition: `n` is a prime)
+    are violated, `true` otherwise.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `n % 2 = 1`
+    - `1 < n`
+    - `0 < a`
+    - `a < n`
 */
 bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a, uint32_t *res)
 {
@@ -308,7 +382,7 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m1 = acc0;
   uint32_t m00 = m0 & m1;
@@ -329,7 +403,7 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t is_valid_m = (m00 & ~m10) & m2;
@@ -393,15 +467,16 @@ bool Hacl_Bignum32_mod_inv_prime_vartime(uint32_t len, uint32_t *n, uint32_t *a,
 /**
 Heap-allocate and initialize a montgomery context.
 
-  The argument n is meant to be `len` limbs in size, i.e. uint32_t[len].
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n % 2 = 1
-  • 1 < n
+  @param n Points to `len` number of limbs, i.e. `uint32_t[len]`.
 
-  The caller will need to call Hacl_Bignum32_mont_ctx_free on the return value
-  to avoid memory leaks.
+  @return A pointer to an allocated and initialized Montgomery context is returned.
+    Clients will need to call `Hacl_Bignum32_mont_ctx_free` on the return value to
+    avoid memory leaks.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n % 2 = 1`
+    - `1 < n`
 */
 Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 *Hacl_Bignum32_mont_ctx_init(uint32_t len, uint32_t *n)
@@ -429,7 +504,7 @@ Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32
 /**
 Deallocate the memory previously allocated by Hacl_Bignum32_mont_ctx_init.
 
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
 */
 void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 {
@@ -444,9 +519,11 @@ void Hacl_Bignum32_mont_ctx_free(Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32 *k)
 /**
 Write `a mod n` in `res`.
 
-  The argument a is meant to be `2*len` limbs in size, i.e. uint32_t[2*len].
-  The outparam res is meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `2*len` number of limbs, i.e. `uint32_t[2*len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
 */
 void
 Hacl_Bignum32_mod_precomp(
@@ -464,21 +541,25 @@ Hacl_Bignum32_mod_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
-  The function is *NOT* constant-time on the argument b. See the
-  mod_exp_consttime_* functions for constant-time variants.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  This function is *NOT* constant-time on the argument `b`. See the
+  `mod_exp_consttime_*` functions for constant-time variants.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_vartime_precomp(
@@ -505,21 +586,25 @@ Hacl_Bignum32_mod_exp_vartime_precomp(
 /**
 Write `a ^ b mod n` in `res`.
 
-  The arguments a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  The argument b is a bignum of any size, and bBits is an upper bound on the
-  number of significant bits of b. A tighter bound results in faster execution
-  time. When in doubt, the number of bits for the bignum size is always a safe
-  default, e.g. if b is a 4096-bit bignum, bBits should be 4096.
-
   This function is constant-time over its argument b, at the cost of a slower
-  execution time than mod_exp_vartime_*.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • b < pow2 bBits
-  • a < n
+  execution time than `mod_exp_vartime_*`.
+
+  @param[in] k Points to a Montgomery context obtained from `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[in] b Points to a bignum of any size, with an upper bound of `bBits` number of
+    significant bits. Must be disjoint from the memory location of `res`.
+  @param[in] bBits An upper bound on the number of significant bits of `b`.
+    A tighter bound results in faster execution time. When in doubt, the number
+    of bits for the bignum size is always a safe default, e.g. if `b` is a 4096-bit
+    bignum, `bBits` should be `4096`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory locations of `a` and `b`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `b < pow2 bBits`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_exp_consttime_precomp(
@@ -546,14 +631,17 @@ Hacl_Bignum32_mod_exp_consttime_precomp(
 /**
 Write `a ^ (-1) mod n` in `res`.
 
-  The argument a and the outparam res are meant to be `len` limbs in size, i.e. uint32_t[len].
-  The argument k is a montgomery context obtained through Hacl_Bignum32_mont_ctx_init.
-
-  Before calling this function, the caller will need to ensure that the following
-  preconditions are observed.
-  • n is a prime
-  • 0 < a
-  • a < n
+  @param[in] k Points to a Montgomery context obtained through `Hacl_Bignum32_mont_ctx_init`.
+  @param[in] a Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `res`.
+  @param[out] res Points to `len` number of limbs, i.e. `uint32_t[len]`. Must be
+    disjoint from the memory location of `a`.
+    
+  @pre Before calling this function, the caller will need to ensure that the following
+    preconditions are observed:
+    - `n` is a prime
+    - `0 < a`
+    - `a < n`
 */
 void
 Hacl_Bignum32_mod_inv_prime_vartime_precomp(
@@ -623,13 +711,13 @@ Hacl_Bignum32_mod_inv_prime_vartime_precomp(
 /**
 Load a bid-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 {
@@ -664,13 +752,13 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_be(uint32_t len, uint8_t *b)
 /**
 Load a little-endian bignum from memory.
 
-  The argument b points to `len` bytes of valid memory.
-  The function returns a heap-allocated bignum of size sufficient to hold the
-   result of loading b, or NULL if either the allocation failed, or the amount of
-    required memory would exceed 4GB.
-
-  If the return value is non-null, clients must eventually call free(3) on it to
-  avoid memory leaks.
+  @param len Size of `b` as number of bytes.
+  @param b Points to `len` number of bytes, i.e. `uint8_t[len]`.
+  
+  @return A heap-allocated bignum of size sufficient to hold the result of
+    loading `b`. Otherwise, `NULL`, if either the allocation failed, or the amount
+    of required memory would exceed 4GB. Clients must `free(3)` any non-null return
+    value to avoid memory leaks.
 */
 uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 {
@@ -707,8 +795,11 @@ uint32_t *Hacl_Bignum32_new_bn_from_bytes_le(uint32_t len, uint8_t *b)
 /**
 Serialize a bignum into big-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -727,8 +818,11 @@ void Hacl_Bignum32_bn_to_bytes_be(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Serialize a bignum into little-endian memory.
 
-  The argument b points to a bignum of ⌈len / 4⌉ size.
-  The outparam res points to `len` bytes of valid memory.
+  @param[in] len Size of `b` as number of bytes.
+  @param[in] b Points to a bignum of `ceil(len/4)` size. Must be disjoint from
+    the memory location of `res`.
+  @param[out] res Points to `len` number of bytes, i.e. `uint8_t[len]`. Must be
+    disjoint from the memory location of `b`.
 */
 void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 {
@@ -753,7 +847,11 @@ void Hacl_Bignum32_bn_to_bytes_le(uint32_t len, uint32_t *b, uint8_t *res)
 /**
 Returns 2^32 - 1 if a < b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if `a < b`, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
@@ -762,7 +860,7 @@ uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], b[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
@@ -770,7 +868,11 @@ uint32_t Hacl_Bignum32_lt_mask(uint32_t len, uint32_t *a, uint32_t *b)
 /**
 Returns 2^32 - 1 if a = b, otherwise returns 0.
 
- The arguments a and b are meant to be `len` limbs in size, i.e. uint32_t[len].
+  @param len Number of limbs.
+  @param a Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  @param b Points to `len` number of limbs, i.e. `uint32_t[len]`.
+  
+  @return `2^32 - 1` if a = b, otherwise, `0`.
 */
 uint32_t Hacl_Bignum32_eq_mask(uint32_t len, uint32_t *a, uint32_t *b)
 {
diff --git a/src/msvc/Hacl_Bignum4096.c b/src/msvc/Hacl_Bignum4096.c
index 920ae2fb..c7c24306 100644
--- a/src/msvc/Hacl_Bignum4096.c
+++ b/src/msvc/Hacl_Bignum4096.c
@@ -459,7 +459,7 @@ bool Hacl_Bignum4096_mod(uint64_t *n, uint64_t *a, uint64_t *res)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   uint64_t is_valid_m = m0 & m1;
@@ -490,7 +490,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m10 = acc0;
   uint64_t m00 = m0 & m10;
@@ -517,7 +517,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
     {
       uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t res = acc;
     m1 = res;
@@ -531,7 +531,7 @@ static uint64_t exp_check(uint64_t *n, uint64_t *a, uint32_t bBits, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t m = m1 & m2;
@@ -930,7 +930,7 @@ bool Hacl_Bignum4096_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *r
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m1 = acc0;
   uint64_t m00 = m0 & m1;
@@ -949,7 +949,7 @@ bool Hacl_Bignum4096_mod_inv_prime_vartime(uint64_t *n, uint64_t *a, uint64_t *r
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t is_valid_m = (m00 & ~m10) & m2;
@@ -1326,7 +1326,7 @@ uint64_t Hacl_Bignum4096_lt_mask(uint64_t *a, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
diff --git a/src/msvc/Hacl_Bignum4096_32.c b/src/msvc/Hacl_Bignum4096_32.c
index f3330918..0d54cb21 100644
--- a/src/msvc/Hacl_Bignum4096_32.c
+++ b/src/msvc/Hacl_Bignum4096_32.c
@@ -451,7 +451,7 @@ bool Hacl_Bignum4096_32_mod(uint32_t *n, uint32_t *a, uint32_t *res)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m1 = acc;
   uint32_t is_valid_m = m0 & m1;
@@ -482,7 +482,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m10 = acc0;
   uint32_t m00 = m0 & m10;
@@ -509,7 +509,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
     {
       uint32_t beq = FStar_UInt32_eq_mask(b[i], b2[i]);
       uint32_t blt = ~FStar_UInt32_gte_mask(b[i], b2[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint32_t res = acc;
     m1 = res;
@@ -523,7 +523,7 @@ static uint32_t exp_check(uint32_t *n, uint32_t *a, uint32_t bBits, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t m = m1 & m2;
@@ -922,7 +922,7 @@ bool Hacl_Bignum4096_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
   {
     uint32_t beq = FStar_UInt32_eq_mask(one[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint32_t m1 = acc0;
   uint32_t m00 = m0 & m1;
@@ -941,7 +941,7 @@ bool Hacl_Bignum4096_32_mod_inv_prime_vartime(uint32_t *n, uint32_t *a, uint32_t
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], n[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint32_t m2 = acc;
   uint32_t is_valid_m = (m00 & ~m10) & m2;
@@ -1317,7 +1317,7 @@ uint32_t Hacl_Bignum4096_32_lt_mask(uint32_t *a, uint32_t *b)
   {
     uint32_t beq = FStar_UInt32_eq_mask(a[i], b[i]);
     uint32_t blt = ~FStar_UInt32_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFU) | (~blt & 0U)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
diff --git a/src/msvc/Hacl_Bignum64.c b/src/msvc/Hacl_Bignum64.c
index e64b1a54..499ca740 100644
--- a/src/msvc/Hacl_Bignum64.c
+++ b/src/msvc/Hacl_Bignum64.c
@@ -170,7 +170,7 @@ bool Hacl_Bignum64_mod(uint32_t len, uint64_t *n, uint64_t *a, uint64_t *res)
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   uint64_t is_valid_m = m0 & m1;
@@ -307,7 +307,7 @@ bool Hacl_Bignum64_mod_inv_prime_vartime(uint32_t len, uint64_t *n, uint64_t *a,
   {
     uint64_t beq = FStar_UInt64_eq_mask(one[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(one[i], n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t m1 = acc0;
   uint64_t m00 = m0 & m1;
@@ -328,7 +328,7 @@ bool Hacl_Bignum64_mod_inv_prime_vartime(uint32_t len, uint64_t *n, uint64_t *a,
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m2 = acc;
   uint64_t is_valid_m = (m00 & ~m10) & m2;
@@ -761,7 +761,7 @@ uint64_t Hacl_Bignum64_lt_mask(uint32_t len, uint64_t *a, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(a[i], b[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(a[i], b[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   return acc;
 }
diff --git a/src/msvc/Hacl_Ed25519.c b/src/msvc/Hacl_Ed25519.c
index d1f8edf2..61e379d2 100644
--- a/src/msvc/Hacl_Ed25519.c
+++ b/src/msvc/Hacl_Ed25519.c
@@ -509,11 +509,7 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
       Hacl_Bignum25519_reduce_513(t01);
       reduce(t01);
       bool z1 = is_0(t01);
-      if (z1 == false)
-      {
-        res = false;
-      }
-      else
+      if (z1)
       {
         uint64_t *x32 = tmp + 5U;
         uint64_t *t0 = tmp + 10U;
@@ -534,6 +530,10 @@ static inline bool recover_x(uint64_t *x, uint64_t *y, uint64_t sign)
         memcpy(x, x32, 5U * sizeof (uint64_t));
         res = true;
       }
+      else
+      {
+        res = false;
+      }
     }
   }
   bool res0 = res;
@@ -551,11 +551,7 @@ bool Hacl_Impl_Ed25519_PointDecompress_point_decompress(uint64_t *out, uint8_t *
   Hacl_Bignum25519_load_51(y, s);
   bool z0 = recover_x(x, y, sign);
   bool res;
-  if (z0 == false)
-  {
-    res = false;
-  }
-  else
+  if (z0)
   {
     uint64_t *outx = out;
     uint64_t *outy = out + 5U;
@@ -571,6 +567,10 @@ bool Hacl_Impl_Ed25519_PointDecompress_point_decompress(uint64_t *out, uint8_t *
     fmul0(outt, x, y);
     res = true;
   }
+  else
+  {
+    res = false;
+  }
   bool res0 = res;
   return res0;
 }
@@ -1150,11 +1150,7 @@ static inline bool gte_q(uint64_t *s)
   {
     return false;
   }
-  if (s3 > 0x00000000000000ULL)
-  {
-    return true;
-  }
-  if (s2 > 0x000000000014deULL)
+  if (s3 > 0x00000000000000ULL || s2 > 0x000000000014deULL)
   {
     return true;
   }
@@ -1170,11 +1166,7 @@ static inline bool gte_q(uint64_t *s)
   {
     return false;
   }
-  if (s0 >= 0x12631a5cf5d3edULL)
-  {
-    return true;
-  }
-  return false;
+  return s0 >= 0x12631a5cf5d3edULL;
 }
 
 static inline bool eq(uint64_t *a, uint64_t *b)
diff --git a/src/msvc/Hacl_FFDHE.c b/src/msvc/Hacl_FFDHE.c
index a2cdfa52..9297c8b4 100644
--- a/src/msvc/Hacl_FFDHE.c
+++ b/src/msvc/Hacl_FFDHE.c
@@ -158,6 +158,7 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
   uint64_t *p_n1 = (uint64_t *)alloca(nLen * sizeof (uint64_t));
   memset(p_n1, 0U, nLen * sizeof (uint64_t));
   uint64_t c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(0ULL, p_n[0U], 1ULL, p_n1);
+  uint64_t c1;
   if (1U < nLen)
   {
     uint64_t *a1 = p_n + 1U;
@@ -184,13 +185,14 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
       uint64_t *res_i = res1 + i;
       c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, 0ULL, res_i);
     }
-    uint64_t c1 = c;
-    KRML_MAYBE_UNUSED_VAR(c1);
+    uint64_t c10 = c;
+    c1 = c10;
   }
   else
   {
-    KRML_MAYBE_UNUSED_VAR(c0);
+    c1 = c0;
   }
+  KRML_MAYBE_UNUSED_VAR(c1);
   KRML_CHECK_SIZE(sizeof (uint64_t), nLen);
   uint64_t *b2 = (uint64_t *)alloca(nLen * sizeof (uint64_t));
   memset(b2, 0U, nLen * sizeof (uint64_t));
@@ -202,7 +204,7 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
   {
     uint64_t beq = FStar_UInt64_eq_mask(b2[i], pk_n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(b2[i], pk_n[i]);
-    acc0 = (beq & acc0) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc0 = (beq & acc0) | (~beq & blt);
   }
   uint64_t res = acc0;
   uint64_t m0 = res;
@@ -211,7 +213,7 @@ static inline uint64_t ffdhe_check_pk(Spec_FFDHE_ffdhe_alg a, uint64_t *pk_n, ui
   {
     uint64_t beq = FStar_UInt64_eq_mask(pk_n[i], p_n1[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(pk_n[i], p_n1[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t m1 = acc;
   return m0 & m1;
diff --git a/src/msvc/Hacl_Frodo_KEM.c b/src/msvc/Hacl_Frodo_KEM.c
index e0a65a47..f15d57ac 100644
--- a/src/msvc/Hacl_Frodo_KEM.c
+++ b/src/msvc/Hacl_Frodo_KEM.c
@@ -30,6 +30,7 @@
 
 void randombytes_(uint32_t len, uint8_t *res)
 {
-  Lib_RandomBuffer_System_randombytes(res, len);
+  bool b = Lib_RandomBuffer_System_randombytes(res, len);
+  KRML_MAYBE_UNUSED_VAR(b);
 }
 
diff --git a/src/msvc/Hacl_HMAC.c b/src/msvc/Hacl_HMAC.c
index 63ab2032..d3f000b0 100644
--- a/src/msvc/Hacl_HMAC.c
+++ b/src/msvc/Hacl_HMAC.c
@@ -26,11 +26,123 @@
 #include "internal/Hacl_HMAC.h"
 
 #include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Hash_SHA3.h"
 #include "internal/Hacl_Hash_SHA2.h"
 #include "internal/Hacl_Hash_SHA1.h"
+#include "internal/Hacl_Hash_MD5.h"
 #include "internal/Hacl_Hash_Blake2s.h"
 #include "internal/Hacl_Hash_Blake2b.h"
 
+/**
+Write the HMAC-MD5 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 byte.
+`dst` must point to 16 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_md5(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 16U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_MD5_hash_oneshot(nkey, key, key_len);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t s[4U] = { 0x67452301U, 0xefcdab89U, 0x98badcfeU, 0x10325476U };
+  uint8_t *dst1 = ipad;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_MD5_update_last(s, 0ULL, ipad, 64U);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_MD5_update_multi(s, ipad, 1U);
+    Hacl_Hash_MD5_update_multi(s, full_blocks, n_blocks);
+    Hacl_Hash_MD5_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+  }
+  Hacl_Hash_MD5_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_MD5_init(s);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 16U / block_len;
+  uint32_t rem0 = 16U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 16U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_MD5_update_multi(s, opad, 1U);
+  Hacl_Hash_MD5_update_multi(s, full_blocks, n_blocks);
+  Hacl_Hash_MD5_update_last(s, (uint64_t)64U + (uint64_t)full_blocks_len, rem, rem_len);
+  Hacl_Hash_MD5_finish(s, dst);
+}
+
 /**
 Write the HMAC-SHA-1 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
@@ -46,10 +158,8 @@ Hacl_HMAC_compute_sha1(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -70,19 +180,17 @@ Hacl_HMAC_compute_sha1(
   {
     Hacl_Hash_SHA1_hash_oneshot(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -145,6 +253,130 @@ Hacl_HMAC_compute_sha1(
   Hacl_Hash_SHA1_finish(s, dst);
 }
 
+/**
+Write the HMAC-SHA-2-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+`dst` must point to 28 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha2_224(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 28U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA2_hash_224(nkey, key, key_len);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t st[8U] = { 0U };
+  KRML_MAYBE_FOR8(i,
+    0U,
+    8U,
+    1U,
+    uint32_t *os = st;
+    uint32_t x = Hacl_Hash_SHA2_h224[i];
+    os[i] = x;);
+  uint32_t *s = st;
+  uint8_t *dst1 = ipad;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA2_sha224_update_last(0ULL + (uint64_t)64U, 64U, ipad, s);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA2_sha224_update_nblocks(64U, ipad, s);
+    Hacl_Hash_SHA2_sha224_update_nblocks(n_blocks * 64U, full_blocks, s);
+    Hacl_Hash_SHA2_sha224_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
+      rem_len,
+      rem,
+      s);
+  }
+  Hacl_Hash_SHA2_sha224_finish(s, dst1);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_SHA2_sha224_init(s);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 28U / block_len;
+  uint32_t rem0 = 28U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 28U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA2_sha224_update_nblocks(64U, opad, s);
+  Hacl_Hash_SHA2_sha224_update_nblocks(n_blocks * 64U, full_blocks, s);
+  Hacl_Hash_SHA2_sha224_update_last((uint64_t)64U + (uint64_t)full_blocks_len + (uint64_t)rem_len,
+    rem_len,
+    rem,
+    s);
+  Hacl_Hash_SHA2_sha224_finish(s, dst);
+}
+
 /**
 Write the HMAC-SHA-2-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
@@ -160,10 +392,8 @@ Hacl_HMAC_compute_sha2_256(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -184,19 +414,17 @@ Hacl_HMAC_compute_sha2_256(
   {
     Hacl_Hash_SHA2_hash_256(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -288,10 +516,8 @@ Hacl_HMAC_compute_sha2_384(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -312,19 +538,17 @@ Hacl_HMAC_compute_sha2_384(
   {
     Hacl_Hash_SHA2_hash_384(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -424,10 +648,8 @@ Hacl_HMAC_compute_sha2_512(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -448,19 +670,17 @@ Hacl_HMAC_compute_sha2_512(
   {
     Hacl_Hash_SHA2_hash_512(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -546,13 +766,13 @@ Hacl_HMAC_compute_sha2_512(
 }
 
 /**
-Write the HMAC-BLAKE2s MAC of a message (`data`) by using a key (`key`) into `dst`.
+Write the HMAC-SHA-3-224 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
-The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
-`dst` must point to 32 bytes of memory.
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 144 bytes.
+`dst` must point to 28 bytes of memory.
 */
 void
-Hacl_HMAC_compute_blake2s_32(
+Hacl_HMAC_compute_sha3_224(
   uint8_t *dst,
   uint8_t *key,
   uint32_t key_len,
@@ -560,60 +780,53 @@ Hacl_HMAC_compute_blake2s_32(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[144U];
+  memset(key_block, 0U, 144U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
-  if (key_len <= 64U)
+  if (key_len <= 144U)
   {
     ite = key_len;
   }
   else
   {
-    ite = 32U;
+    ite = 28U;
   }
   uint8_t *zeroes = key_block + ite;
   KRML_MAYBE_UNUSED_VAR(zeroes);
-  if (key_len <= 64U)
+  if (key_len <= 144U)
   {
     memcpy(nkey, key, key_len * sizeof (uint8_t));
   }
   else
   {
-    Hacl_Hash_Blake2s_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
+    Hacl_Hash_SHA3_sha3_224(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[144U];
+  memset(ipad, 0x36U, 144U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 144U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[144U];
+  memset(opad, 0x5cU, 144U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 144U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  uint32_t s[16U] = { 0U };
-  Hacl_Hash_Blake2s_init(s, 0U, 32U);
-  uint32_t *s0 = s;
+  uint64_t s[25U] = { 0U };
   uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
-    uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_224, s, ipad, 144U);
   }
   else
   {
-    uint32_t block_len = 64U;
+    uint32_t block_len = 144U;
     uint32_t n_blocks0 = data_len / block_len;
     uint32_t rem0 = data_len % block_len;
     K___uint32_t_uint32_t scrut;
@@ -631,34 +844,29 @@ Hacl_HMAC_compute_blake2s_32(
     uint32_t full_blocks_len = n_blocks * block_len;
     uint8_t *full_blocks = data;
     uint8_t *rem = data + full_blocks_len;
-    uint32_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_multi(64U, wv, s0, 0ULL, ipad, 1U);
-    uint32_t wv0[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_multi(n_blocks * 64U,
-      wv0,
-      s0,
-      (uint64_t)block_len,
-      full_blocks,
-      n_blocks);
-    uint32_t wv1[16U] = { 0U };
-    Hacl_Hash_Blake2s_update_last(rem_len,
-      wv1,
-      s0,
-      (uint64_t)64U + (uint64_t)full_blocks_len,
-      rem_len,
-      rem);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_224, s, rem, rem_len);
   }
-  Hacl_Hash_Blake2s_finish(32U, dst1, s0);
+  uint32_t remOut = 28U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 28U - remOut, hbuf0, remOut * sizeof (uint8_t));
   uint8_t *hash1 = ipad;
-  Hacl_Hash_Blake2s_init(s0, 0U, 32U);
-  uint32_t block_len = 64U;
-  uint32_t n_blocks0 = 32U / block_len;
-  uint32_t rem0 = 32U % block_len;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 144U;
+  uint32_t n_blocks0 = 28U / block_len;
+  uint32_t rem0 = 28U % block_len;
   K___uint32_t_uint32_t scrut;
   if (n_blocks0 > 0U && rem0 == 0U)
   {
     uint32_t n_blocks_ = n_blocks0 - 1U;
-    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 32U - n_blocks_ * block_len });
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 28U - n_blocks_ * block_len });
   }
   else
   {
@@ -669,33 +877,28 @@ Hacl_HMAC_compute_blake2s_32(
   uint32_t full_blocks_len = n_blocks * block_len;
   uint8_t *full_blocks = hash1;
   uint8_t *rem = hash1 + full_blocks_len;
-  uint32_t wv[16U] = { 0U };
-  Hacl_Hash_Blake2s_update_multi(64U, wv, s0, 0ULL, opad, 1U);
-  uint32_t wv0[16U] = { 0U };
-  Hacl_Hash_Blake2s_update_multi(n_blocks * 64U,
-    wv0,
-    s0,
-    (uint64_t)block_len,
-    full_blocks,
-    n_blocks);
-  uint32_t wv1[16U] = { 0U };
-  Hacl_Hash_Blake2s_update_last(rem_len,
-    wv1,
-    s0,
-    (uint64_t)64U + (uint64_t)full_blocks_len,
-    rem_len,
-    rem);
-  Hacl_Hash_Blake2s_finish(32U, dst, s0);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_224, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_224, s, rem, rem_len);
+  uint32_t remOut0 = 28U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 28U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
 }
 
 /**
-Write the HMAC-BLAKE2b MAC of a message (`data`) by using a key (`key`) into `dst`.
+Write the HMAC-SHA-3-256 MAC of a message (`data`) by using a key (`key`) into `dst`.
 
-The key can be any length and will be hashed if it is longer and padded if it is shorter than 128 bytes.
-`dst` must point to 64 bytes of memory.
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 136 bytes.
+`dst` must point to 32 bytes of memory.
 */
 void
-Hacl_HMAC_compute_blake2b_32(
+Hacl_HMAC_compute_sha3_256(
   uint8_t *dst,
   uint8_t *key,
   uint32_t key_len,
@@ -703,56 +906,577 @@ Hacl_HMAC_compute_blake2b_32(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[136U];
+  memset(key_block, 0U, 136U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
-  if (key_len <= 128U)
+  if (key_len <= 136U)
   {
     ite = key_len;
   }
   else
   {
-    ite = 64U;
+    ite = 32U;
   }
   uint8_t *zeroes = key_block + ite;
   KRML_MAYBE_UNUSED_VAR(zeroes);
-  if (key_len <= 128U)
+  if (key_len <= 136U)
   {
     memcpy(nkey, key, key_len * sizeof (uint8_t));
   }
   else
   {
-    Hacl_Hash_Blake2b_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
+    Hacl_Hash_SHA3_sha3_256(nkey, key, key_len);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[136U];
+  memset(ipad, 0x36U, 136U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 136U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[136U];
+  memset(opad, 0x5cU, 136U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 136U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
     opad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  uint64_t s[16U] = { 0U };
-  Hacl_Hash_Blake2b_init(s, 0U, 64U);
-  uint64_t *s0 = s;
+  uint64_t s[25U] = { 0U };
   uint8_t *dst1 = ipad;
   if (data_len == 0U)
   {
-    uint64_t wv[16U] = { 0U };
-    Hacl_Hash_Blake2b_update_last(128U, wv, s0, FStar_UInt128_uint64_to_uint128(0ULL), 128U, ipad);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_256, s, ipad, 136U);
+  }
+  else
+  {
+    uint32_t block_len = 136U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_256, s, rem, rem_len);
+  }
+  uint32_t remOut = 32U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 32U - remOut, hbuf0, remOut * sizeof (uint8_t));
+  uint8_t *hash1 = ipad;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 136U;
+  uint32_t n_blocks0 = 32U / block_len;
+  uint32_t rem0 = 32U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 32U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_256, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_256, s, rem, rem_len);
+  uint32_t remOut0 = 32U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 32U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
+}
+
+/**
+Write the HMAC-SHA-3-384 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 104 bytes.
+`dst` must point to 48 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_384(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[104U];
+  memset(key_block, 0U, 104U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 104U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 48U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 104U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA3_sha3_384(nkey, key, key_len);
+  }
+  uint8_t ipad[104U];
+  memset(ipad, 0x36U, 104U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 104U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[104U];
+  memset(opad, 0x5cU, 104U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 104U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint64_t s[25U] = { 0U };
+  uint8_t *dst1 = ipad;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_384, s, ipad, 104U);
+  }
+  else
+  {
+    uint32_t block_len = 104U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_384, s, rem, rem_len);
+  }
+  uint32_t remOut = 48U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 48U - remOut, hbuf0, remOut * sizeof (uint8_t));
+  uint8_t *hash1 = ipad;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 104U;
+  uint32_t n_blocks0 = 48U / block_len;
+  uint32_t rem0 = 48U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 48U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_384, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_384, s, rem, rem_len);
+  uint32_t remOut0 = 48U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 48U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
+}
+
+/**
+Write the HMAC-SHA-3-512 MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 72 bytes.
+`dst` must point to 64 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_sha3_512(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[72U];
+  memset(key_block, 0U, 72U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 72U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 64U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 72U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_SHA3_sha3_512(nkey, key, key_len);
+  }
+  uint8_t ipad[72U];
+  memset(ipad, 0x36U, 72U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 72U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[72U];
+  memset(opad, 0x5cU, 72U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 72U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint64_t s[25U] = { 0U };
+  uint8_t *dst1 = ipad;
+  if (data_len == 0U)
+  {
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_512, s, ipad, 72U);
+  }
+  else
+  {
+    uint32_t block_len = 72U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, ipad, 1U);
+    Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, full_blocks, n_blocks);
+    Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_512, s, rem, rem_len);
+  }
+  uint32_t remOut = 64U;
+  uint8_t hbuf0[256U] = { 0U };
+  uint64_t ws0[32U] = { 0U };
+  memcpy(ws0, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf0 + i * 8U, ws0[i]);
+  }
+  memcpy(dst1 + 64U - remOut, hbuf0, remOut * sizeof (uint8_t));
+  uint8_t *hash1 = ipad;
+  memset(s, 0U, 25U * sizeof (uint64_t));
+  uint32_t block_len = 72U;
+  uint32_t n_blocks0 = 64U / block_len;
+  uint32_t rem0 = 64U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 64U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, opad, 1U);
+  Hacl_Hash_SHA3_update_multi_sha3(Spec_Hash_Definitions_SHA3_512, s, full_blocks, n_blocks);
+  Hacl_Hash_SHA3_update_last_sha3(Spec_Hash_Definitions_SHA3_512, s, rem, rem_len);
+  uint32_t remOut0 = 64U;
+  uint8_t hbuf[256U] = { 0U };
+  uint64_t ws[32U] = { 0U };
+  memcpy(ws, s, 25U * sizeof (uint64_t));
+  for (uint32_t i = 0U; i < 32U; i++)
+  {
+    store64_le(hbuf + i * 8U, ws[i]);
+  }
+  memcpy(dst + 64U - remOut0, hbuf, remOut0 * sizeof (uint8_t));
+}
+
+/**
+Write the HMAC-BLAKE2s MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 64 bytes.
+`dst` must point to 32 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_blake2s_32(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 64U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 32U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 64U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_Blake2s_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
+  }
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint32_t s[16U] = { 0U };
+  Hacl_Hash_Blake2s_init(s, 0U, 32U);
+  uint32_t *s0 = s;
+  uint8_t *dst1 = ipad;
+  if (data_len == 0U)
+  {
+    uint32_t wv[16U] = { 0U };
+    Hacl_Hash_Blake2s_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
+  }
+  else
+  {
+    uint32_t block_len = 64U;
+    uint32_t n_blocks0 = data_len / block_len;
+    uint32_t rem0 = data_len % block_len;
+    K___uint32_t_uint32_t scrut;
+    if (n_blocks0 > 0U && rem0 == 0U)
+    {
+      uint32_t n_blocks_ = n_blocks0 - 1U;
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = data_len - n_blocks_ * block_len });
+    }
+    else
+    {
+      scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+    }
+    uint32_t n_blocks = scrut.fst;
+    uint32_t rem_len = scrut.snd;
+    uint32_t full_blocks_len = n_blocks * block_len;
+    uint8_t *full_blocks = data;
+    uint8_t *rem = data + full_blocks_len;
+    uint32_t wv[16U] = { 0U };
+    Hacl_Hash_Blake2s_update_multi(64U, wv, s0, 0ULL, ipad, 1U);
+    uint32_t wv0[16U] = { 0U };
+    Hacl_Hash_Blake2s_update_multi(n_blocks * 64U,
+      wv0,
+      s0,
+      (uint64_t)block_len,
+      full_blocks,
+      n_blocks);
+    uint32_t wv1[16U] = { 0U };
+    Hacl_Hash_Blake2s_update_last(rem_len,
+      wv1,
+      s0,
+      false,
+      (uint64_t)64U + (uint64_t)full_blocks_len,
+      rem_len,
+      rem);
+  }
+  Hacl_Hash_Blake2s_finish(32U, dst1, s0);
+  uint8_t *hash1 = ipad;
+  Hacl_Hash_Blake2s_init(s0, 0U, 32U);
+  uint32_t block_len = 64U;
+  uint32_t n_blocks0 = 32U / block_len;
+  uint32_t rem0 = 32U % block_len;
+  K___uint32_t_uint32_t scrut;
+  if (n_blocks0 > 0U && rem0 == 0U)
+  {
+    uint32_t n_blocks_ = n_blocks0 - 1U;
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks_, .snd = 32U - n_blocks_ * block_len });
+  }
+  else
+  {
+    scrut = ((K___uint32_t_uint32_t){ .fst = n_blocks0, .snd = rem0 });
+  }
+  uint32_t n_blocks = scrut.fst;
+  uint32_t rem_len = scrut.snd;
+  uint32_t full_blocks_len = n_blocks * block_len;
+  uint8_t *full_blocks = hash1;
+  uint8_t *rem = hash1 + full_blocks_len;
+  uint32_t wv[16U] = { 0U };
+  Hacl_Hash_Blake2s_update_multi(64U, wv, s0, 0ULL, opad, 1U);
+  uint32_t wv0[16U] = { 0U };
+  Hacl_Hash_Blake2s_update_multi(n_blocks * 64U,
+    wv0,
+    s0,
+    (uint64_t)block_len,
+    full_blocks,
+    n_blocks);
+  uint32_t wv1[16U] = { 0U };
+  Hacl_Hash_Blake2s_update_last(rem_len,
+    wv1,
+    s0,
+    false,
+    (uint64_t)64U + (uint64_t)full_blocks_len,
+    rem_len,
+    rem);
+  Hacl_Hash_Blake2s_finish(32U, dst, s0);
+}
+
+/**
+Write the HMAC-BLAKE2b MAC of a message (`data`) by using a key (`key`) into `dst`.
+
+The key can be any length and will be hashed if it is longer and padded if it is shorter than 128 bytes.
+`dst` must point to 64 bytes of memory.
+*/
+void
+Hacl_HMAC_compute_blake2b_32(
+  uint8_t *dst,
+  uint8_t *key,
+  uint32_t key_len,
+  uint8_t *data,
+  uint32_t data_len
+)
+{
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
+  uint8_t *nkey = key_block;
+  uint32_t ite;
+  if (key_len <= 128U)
+  {
+    ite = key_len;
+  }
+  else
+  {
+    ite = 64U;
+  }
+  uint8_t *zeroes = key_block + ite;
+  KRML_MAYBE_UNUSED_VAR(zeroes);
+  if (key_len <= 128U)
+  {
+    memcpy(nkey, key, key_len * sizeof (uint8_t));
+  }
+  else
+  {
+    Hacl_Hash_Blake2b_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
+  }
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
+  {
+    uint8_t xi = ipad[i];
+    uint8_t yi = key_block[i];
+    ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
+  {
+    uint8_t xi = opad[i];
+    uint8_t yi = key_block[i];
+    opad[i] = (uint32_t)xi ^ (uint32_t)yi;
+  }
+  uint64_t s[16U] = { 0U };
+  Hacl_Hash_Blake2b_init(s, 0U, 64U);
+  uint64_t *s0 = s;
+  uint8_t *dst1 = ipad;
+  if (data_len == 0U)
+  {
+    uint64_t wv[16U] = { 0U };
+    Hacl_Hash_Blake2b_update_last(128U,
+      wv,
+      s0,
+      false,
+      FStar_UInt128_uint64_to_uint128(0ULL),
+      128U,
+      ipad);
   }
   else
   {
@@ -787,6 +1511,7 @@ Hacl_HMAC_compute_blake2b_32(
     Hacl_Hash_Blake2b_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
@@ -826,6 +1551,7 @@ Hacl_HMAC_compute_blake2b_32(
   Hacl_Hash_Blake2b_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/msvc/Hacl_HMAC_Blake2b_256.c b/src/msvc/Hacl_HMAC_Blake2b_256.c
index cd16e65e..ca0ec144 100644
--- a/src/msvc/Hacl_HMAC_Blake2b_256.c
+++ b/src/msvc/Hacl_HMAC_Blake2b_256.c
@@ -44,10 +44,8 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   uint32_t data_len
 )
 {
-  uint32_t l = 128U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[128U];
+  memset(key_block, 0U, 128U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 128U)
@@ -68,19 +66,17 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   {
     Hacl_Hash_Blake2b_Simd256_hash_with_key(nkey, 64U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[128U];
+  memset(ipad, 0x36U, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[128U];
+  memset(opad, 0x5cU, 128U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 128U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -96,6 +92,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
     Hacl_Hash_Blake2b_Simd256_update_last(128U,
       wv,
       s0,
+      false,
       FStar_UInt128_uint64_to_uint128(0ULL),
       128U,
       ipad);
@@ -138,6 +135,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
     Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
       wv1,
       s0,
+      false,
       FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
         FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
       rem_len,
@@ -182,6 +180,7 @@ Hacl_HMAC_Blake2b_256_compute_blake2b_256(
   Hacl_Hash_Blake2b_Simd256_update_last(rem_len,
     wv1,
     s0,
+    false,
     FStar_UInt128_add(FStar_UInt128_uint64_to_uint128((uint64_t)128U),
       FStar_UInt128_uint64_to_uint128((uint64_t)full_blocks_len)),
     rem_len,
diff --git a/src/msvc/Hacl_HMAC_Blake2s_128.c b/src/msvc/Hacl_HMAC_Blake2s_128.c
index bf2033a8..3f0c333d 100644
--- a/src/msvc/Hacl_HMAC_Blake2s_128.c
+++ b/src/msvc/Hacl_HMAC_Blake2s_128.c
@@ -43,10 +43,8 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   uint32_t data_len
 )
 {
-  uint32_t l = 64U;
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *key_block = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(key_block, 0U, l * sizeof (uint8_t));
+  uint8_t key_block[64U];
+  memset(key_block, 0U, 64U * sizeof (uint8_t));
   uint8_t *nkey = key_block;
   uint32_t ite;
   if (key_len <= 64U)
@@ -67,19 +65,17 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   {
     Hacl_Hash_Blake2s_Simd128_hash_with_key(nkey, 32U, key, key_len, NULL, 0U);
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *ipad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(ipad, 0x36U, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t ipad[64U];
+  memset(ipad, 0x36U, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = ipad[i];
     uint8_t yi = key_block[i];
     ipad[i] = (uint32_t)xi ^ (uint32_t)yi;
   }
-  KRML_CHECK_SIZE(sizeof (uint8_t), l);
-  uint8_t *opad = (uint8_t *)alloca(l * sizeof (uint8_t));
-  memset(opad, 0x5cU, l * sizeof (uint8_t));
-  for (uint32_t i = 0U; i < l; i++)
+  uint8_t opad[64U];
+  memset(opad, 0x5cU, 64U * sizeof (uint8_t));
+  for (uint32_t i = 0U; i < 64U; i++)
   {
     uint8_t xi = opad[i];
     uint8_t yi = key_block[i];
@@ -92,7 +88,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   if (data_len == 0U)
   {
     KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv[4U] KRML_POST_ALIGN(16) = { 0U };
-    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, 0ULL, 64U, ipad);
+    Hacl_Hash_Blake2s_Simd128_update_last(64U, wv, s0, false, 0ULL, 64U, ipad);
   }
   else
   {
@@ -127,6 +123,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
     Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
       wv1,
       s0,
+      false,
       (uint64_t)64U + (uint64_t)full_blocks_len,
       rem_len,
       rem);
@@ -165,6 +162,7 @@ Hacl_HMAC_Blake2s_128_compute_blake2s_128(
   Hacl_Hash_Blake2s_Simd128_update_last(rem_len,
     wv1,
     s0,
+    false,
     (uint64_t)64U + (uint64_t)full_blocks_len,
     rem_len,
     rem);
diff --git a/src/msvc/Hacl_Hash_Blake2b.c b/src/msvc/Hacl_Hash_Blake2b.c
index d490a1a5..1bab75e6 100644
--- a/src/msvc/Hacl_Hash_Blake2b.c
+++ b/src/msvc/Hacl_Hash_Blake2b.c
@@ -29,7 +29,14 @@
 #include "lib_memzero0.h"
 
 static void
-update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totlen, uint8_t *d)
+update_block(
+  uint64_t *wv,
+  uint64_t *hash,
+  bool flag,
+  bool last_node,
+  FStar_UInt128_uint128 totlen,
+  uint8_t *d
+)
 {
   uint64_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
@@ -52,7 +59,15 @@ update_block(uint64_t *wv, uint64_t *hash, bool flag, FStar_UInt128_uint128 totl
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask[0U] = FStar_UInt128_uint128_to_uint64(totlen);
   mask[1U] = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U));
   mask[2U] = wv_14;
@@ -560,86 +575,6 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn)
   r1[3U] = iv7_;
 }
 
-static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint64_t tmp[8U] = { 0U };
-  uint64_t *r0 = hash;
-  uint64_t *r1 = hash + 4U;
-  uint64_t *r2 = hash + 8U;
-  uint64_t *r3 = hash + 12U;
-  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
-  r2[0U] = iv0;
-  r2[1U] = iv1;
-  r2[2U] = iv2;
-  r2[3U] = iv3;
-  r3[0U] = iv4;
-  r3[1U] = iv5;
-  r3[2U] = iv6;
-  r3[3U] = iv7;
-  uint8_t kk = p.key_length;
-  uint8_t nn = p.digest_length;
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint64_t)nn
-    ^
-      ((uint64_t)kk
-      << 8U
-      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
-  tmp[1U] = p.node_offset;
-  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
-  tmp[3U] = 0ULL;
-  uint64_t tmp0 = tmp[0U];
-  uint64_t tmp1 = tmp[1U];
-  uint64_t tmp2 = tmp[2U];
-  uint64_t tmp3 = tmp[3U];
-  uint64_t tmp4 = tmp[4U];
-  uint64_t tmp5 = tmp[5U];
-  uint64_t tmp6 = tmp[6U];
-  uint64_t tmp7 = tmp[7U];
-  uint64_t iv0_ = iv0 ^ tmp0;
-  uint64_t iv1_ = iv1 ^ tmp1;
-  uint64_t iv2_ = iv2 ^ tmp2;
-  uint64_t iv3_ = iv3 ^ tmp3;
-  uint64_t iv4_ = iv4 ^ tmp4;
-  uint64_t iv5_ = iv5 ^ tmp5;
-  uint64_t iv6_ = iv6 ^ tmp6;
-  uint64_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = iv0_;
-  r0[1U] = iv1_;
-  r0[2U] = iv2_;
-  r0[3U] = iv3_;
-  r1[0U] = iv4_;
-  r1[1U] = iv5_;
-  r1[2U] = iv6_;
-  r1[3U] = iv7_;
-}
-
 static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
 {
   FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U);
@@ -647,11 +582,11 @@ static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -674,7 +609,7 @@ Hacl_Hash_Blake2b_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -683,6 +618,7 @@ Hacl_Hash_Blake2b_update_last(
   uint32_t len,
   uint64_t *wv,
   uint64_t *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -693,7 +629,7 @@ Hacl_Hash_Blake2b_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -727,7 +663,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -762,16 +698,19 @@ void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash)
 }
 
 static Hacl_Hash_Blake2b_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -790,17 +729,94 @@ static Hacl_Hash_Blake2b_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i.key_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint64_t *h = block_state.f3.snd;
+  uint32_t kk20 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
+  if (!(kk20 == 0U))
   {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk20 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = h;
+  uint64_t *r1 = h + 4U;
+  uint64_t *r2 = h + 8U;
+  uint64_t *r3 = h + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
   return p;
 }
 
@@ -820,14 +836,16 @@ The caller must satisfy the following requirements.
 
 */
 Hacl_Hash_Blake2b_state_t
-*Hacl_Hash_Blake2b_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2b_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -844,7 +862,7 @@ The caller must satisfy the following requirements.
 Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
   uint8_t salt[16U] = { 0U };
   uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
@@ -855,7 +873,7 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, k);
+  Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
@@ -872,38 +890,116 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s)
 {
   Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2b_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i1.key_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint64_t *h = block_state.f3.snd;
+  uint32_t kk20 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
+  if (!(kk20 == 0U))
   {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk20 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint64_t tmp[8U] = { 0U };
+  uint64_t *r0 = h;
+  uint64_t *r1 = h + 4U;
+  uint64_t *r2 = h + 8U;
+  uint64_t *r3 = h + 12U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -915,13 +1011,13 @@ reset_raw(
     ite = 0U;
   }
   Hacl_Hash_Blake2b_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
+  state[0U] = tmp8;
 }
 
 /**
  General-purpose re-initialization function with parameters and
-key. You cannot change digest_length or key_length, meaning those values in
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
 the parameters object must be the same as originally decided via one of the
 malloc functions. All other values of the parameter can be changed. The behavior
 is unspecified if you violate this precondition.
@@ -933,8 +1029,9 @@ Hacl_Hash_Blake2b_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
@@ -957,7 +1054,7 @@ void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
@@ -1040,7 +1137,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      K____uint64_t___uint64_t_ acc = block_state1.f3;
       uint64_t *wv = acc.fst;
       uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1065,7 +1162,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    K____uint64_t___uint64_t_ acc = block_state1.f3;
     uint64_t *wv = acc.fst;
     uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1133,7 +1230,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint64_t___uint64_t_ acc = block_state1.thd;
+      K____uint64_t___uint64_t_ acc = block_state1.f3;
       uint64_t *wv = acc.fst;
       uint64_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1159,7 +1256,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint64_t___uint64_t_ acc = block_state1.thd;
+    K____uint64_t___uint64_t_ acc = block_state1.f3;
     uint64_t *wv = acc.fst;
     uint64_t *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1190,16 +1287,20 @@ at least `digest_length` bytes, where `digest_length` was determined by your
 choice of `malloc` function. Concretely, if you used `malloc` or
 `malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
 digest length). If you used `malloc_with_params_and_key`, then the expected
-length is whatever you chose for the `digest_length` field of your
-parameters.
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_state_t scrut = *state;
+  Hacl_Hash_Blake2b_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_state_t scrut = *s;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1217,9 +1318,14 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   uint64_t b[16U] = { 0U };
   Hacl_Hash_Blake2b_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint64_t *src_b = block_state.thd.snd;
-  uint64_t *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  uint64_t *src_b = block_state.f3.snd;
+  uint64_t *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1233,7 +1339,7 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  K____uint64_t___uint64_t_ acc0 = tmp_block_state.thd;
+  K____uint64_t___uint64_t_ acc0 = tmp_block_state.f3;
   uint64_t *wv1 = acc0.fst;
   uint64_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
@@ -1244,17 +1350,35 @@ void Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *state, uint8_t *output)
     buf_multi,
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint64_t___uint64_t_ acc = tmp_block_state.thd;
+  K____uint64_t___uint64_t_ acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   uint64_t *wv = acc.fst;
   uint64_t *hash = acc.snd;
   Hacl_Hash_Blake2b_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2b_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s)
+{
+  Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1265,8 +1389,8 @@ void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state)
   Hacl_Hash_Blake2b_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state;
-  uint64_t *b = block_state.thd.snd;
-  uint64_t *wv = block_state.thd.fst;
+  uint64_t *b = block_state.f3.snd;
+  uint64_t *wv = block_state.f3.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1282,17 +1406,24 @@ Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *sta
   Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t));
   Hacl_Hash_Blake2b_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint64_t *src_b = block_state0.thd.snd;
-  uint64_t *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  uint64_t *src_b = block_state0.f3.snd;
+  uint64_t *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint64_t));
   Hacl_Hash_Blake2b_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1335,10 +1466,10 @@ Hacl_Hash_Blake2b_hash_with_key(
 Write the BLAKE2b digest of message `input` using key `key` and
 parameters `params` into `output`. The `key` array must be of length
 `params.key_length`. The `output` array must be of length
-`params.digest_length`. 
+`params.digest_length`.
 */
 void
-Hacl_Hash_Blake2b_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_Blake2b_Simd256.c b/src/msvc/Hacl_Hash_Blake2b_Simd256.c
index 0afd93bc..19234ab9 100644
--- a/src/msvc/Hacl_Hash_Blake2b_Simd256.c
+++ b/src/msvc/Hacl_Hash_Blake2b_Simd256.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
   bool flag,
+  bool last_node,
   FStar_UInt128_uint128 totlen,
   uint8_t *d
 )
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0ULL;
   }
-  uint64_t wv_15 = 0ULL;
+  uint64_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFFFFFFFFFULL;
+  }
+  else
+  {
+    wv_15 = 0ULL;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec256_load64s(FStar_UInt128_uint128_to_uint64(totlen),
       FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U)),
@@ -289,75 +298,6 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k
   r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
 }
 
-static void
-init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint64_t tmp[8U] = { 0U };
-  Lib_IntVector_Intrinsics_vec256 *r0 = hash;
-  Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U;
-  Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U;
-  Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U;
-  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
-  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
-  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
-  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
-  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
-  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
-  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
-  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
-  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
-  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
-  uint8_t kk = p.key_length;
-  uint8_t nn = p.digest_length;
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint64_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 8U;
-    uint64_t u = load64_le(bj);
-    uint64_t r = u;
-    uint64_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint64_t)nn
-    ^
-      ((uint64_t)kk
-      << 8U
-      ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U)));
-  tmp[1U] = p.node_offset;
-  tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U;
-  tmp[3U] = 0ULL;
-  uint64_t tmp0 = tmp[0U];
-  uint64_t tmp1 = tmp[1U];
-  uint64_t tmp2 = tmp[2U];
-  uint64_t tmp3 = tmp[3U];
-  uint64_t tmp4 = tmp[4U];
-  uint64_t tmp5 = tmp[5U];
-  uint64_t tmp6 = tmp[6U];
-  uint64_t tmp7 = tmp[7U];
-  uint64_t iv0_ = iv0 ^ tmp0;
-  uint64_t iv1_ = iv1 ^ tmp1;
-  uint64_t iv2_ = iv2 ^ tmp2;
-  uint64_t iv3_ = iv3 ^ tmp3;
-  uint64_t iv4_ = iv4 ^ tmp4;
-  uint64_t iv5_ = iv5 ^ tmp5;
-  uint64_t iv6_ = iv6 ^ tmp6;
-  uint64_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
-  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
-}
-
 static void
 update_key(
   Lib_IntVector_Intrinsics_vec256 *wv,
@@ -372,11 +312,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
@@ -399,7 +339,7 @@ Hacl_Hash_Blake2b_Simd256_update_multi(
       FStar_UInt128_add_mod(prev,
         FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U)));
     uint8_t *b = blocks + i * 128U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -408,6 +348,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec256 *wv,
   Lib_IntVector_Intrinsics_vec256 *hash,
+  bool last_node,
   FStar_UInt128_uint128 prev,
   uint32_t rem,
   uint8_t *d
@@ -418,7 +359,7 @@ Hacl_Hash_Blake2b_Simd256_update_last(
   memcpy(b, last, rem * sizeof (uint8_t));
   FStar_UInt128_uint128
   totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len));
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 128U, uint8_t, void *);
 }
 
@@ -452,7 +393,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2b_Simd256_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -593,10 +534,7 @@ Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2b_Simd256_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -610,7 +548,13 @@ static Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -632,52 +576,131 @@ static Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i.key_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd;
+  uint32_t kk20 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
+  if (!(kk20 == 0U))
   {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk20 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = h;
+  Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r4 = u;
+    uint64_t x = r4;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (256 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 256 for S, 64 for B.
+- The digest_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (256 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 256 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 64U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(16U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[16U] = { 0U };
+  uint8_t personal[16U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -685,21 +708,16 @@ Hacl_Hash_Blake2b_Simd256_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2b_Simd256_state_t
-  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 {
@@ -709,38 +727,105 @@ Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2b_Simd256_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
-  uint32_t kk2 = (uint32_t)i1.key_length;
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd;
+  uint32_t kk20 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
-  if (!(kk2 == 0U))
+  if (!(kk20 == 0U))
   {
-    uint8_t *sub_b = buf + kk2;
-    memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t));
-    memcpy(buf, k_1, kk2 * sizeof (uint8_t));
+    uint8_t *sub_b = buf + kk20;
+    memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t));
+    memcpy(buf, k_1, kk20 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint64_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec256 *r0 = h;
+  Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U;
+  uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U];
+  uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U];
+  uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U];
+  uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U];
+  uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U];
+  uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U];
+  uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U];
+  uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7);
+  uint8_t kk2 = pv.key_length;
+  uint8_t nn1 = pv.digest_length;
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint64_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 8U;
+    uint64_t u = load64_le(bj);
+    uint64_t r = u;
+    uint64_t x = r;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint64_t)nn1
+    ^
+      ((uint64_t)kk2
+      << 8U
+      ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U)));
+  tmp[1U] = pv.node_offset;
+  tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U;
+  tmp[3U] = 0ULL;
+  uint64_t tmp0 = tmp[0U];
+  uint64_t tmp1 = tmp[1U];
+  uint64_t tmp2 = tmp[2U];
+  uint64_t tmp3 = tmp[3U];
+  uint64_t tmp4 = tmp[4U];
+  uint64_t tmp5 = tmp[5U];
+  uint64_t tmp6 = tmp[6U];
+  uint64_t tmp7 = tmp[7U];
+  uint64_t iv0_ = iv0 ^ tmp0;
+  uint64_t iv1_ = iv1 ^ tmp1;
+  uint64_t iv2_ = iv2 ^ tmp2;
+  uint64_t iv3_ = iv3 ^ tmp3;
+  uint64_t iv4_ = iv4 ^ tmp4;
+  uint64_t iv5_ = iv5 ^ tmp5;
+  uint64_t iv6_ = iv6 ^ tmp6;
+  uint64_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -752,14 +837,16 @@ reset_raw(
     ite = 0U;
   }
   Hacl_Hash_Blake2b_Simd256_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
+  state[0U] = tmp8;
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
@@ -768,15 +855,17 @@ Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k)
 {
@@ -791,11 +880,16 @@ void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 {
@@ -803,7 +897,7 @@ void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2b_Simd256_update(
@@ -873,8 +967,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -899,7 +992,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -967,8 +1060,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -994,7 +1086,7 @@ Hacl_Hash_Blake2b_Simd256_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
     uint32_t nb = data1_len / 128U;
@@ -1020,16 +1112,25 @@ Hacl_Hash_Blake2b_Simd256_update(
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2b_Simd256_state_t scrut = *s;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1047,9 +1148,14 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U };
   Hacl_Hash_Blake2b_Simd256_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1064,7 +1170,7 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
   K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc0 = tmp_block_state.thd;
+  acc0 = tmp_block_state.f3;
   Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst;
   Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd;
   uint32_t nb = 0U;
@@ -1076,17 +1182,35 @@ Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *state, uint8
     nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_
-  acc = tmp_block_state.thd;
+  acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Lib_IntVector_Intrinsics_vec256 *wv = acc.fst;
   Lib_IntVector_Intrinsics_vec256 *hash = acc.snd;
   Hacl_Hash_Blake2b_Simd256_update_last(r,
     wv,
     hash,
+    last_node1,
     FStar_UInt128_uint64_to_uint128(prev_len_last),
     r,
     buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s)
+{
+  Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1097,8 +1221,8 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
   Hacl_Hash_Blake2b_Simd256_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec256 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec256 *b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1106,7 +1230,7 @@ void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2b_Simd256_state_t
 *Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state)
@@ -1115,9 +1239,10 @@ Hacl_Hash_Blake2b_Simd256_state_t
   Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t));
   memcpy(buf, buf0, 128U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec256
@@ -1131,9 +1256,15 @@ Hacl_Hash_Blake2b_Simd256_state_t
       sizeof (Lib_IntVector_Intrinsics_vec256) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.f3.snd;
+  Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256));
   Hacl_Hash_Blake2b_Simd256_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1175,8 +1306,14 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *);
 }
 
+/**
+Write the BLAKE2b digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2b_Simd256_hash_with_key_and_paramas(
+Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_Blake2s.c b/src/msvc/Hacl_Hash_Blake2s.c
index 6e19d83d..ceb73850 100644
--- a/src/msvc/Hacl_Hash_Blake2s.c
+++ b/src/msvc/Hacl_Hash_Blake2s.c
@@ -30,7 +30,14 @@
 #include "lib_memzero0.h"
 
 static inline void
-update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *d)
+update_block(
+  uint32_t *wv,
+  uint32_t *hash,
+  bool flag,
+  bool last_node,
+  uint64_t totlen,
+  uint8_t *d
+)
 {
   uint32_t m_w[16U] = { 0U };
   KRML_MAYBE_FOR16(i,
@@ -53,7 +60,15 @@ update_block(uint32_t *wv, uint32_t *hash, bool flag, uint64_t totlen, uint8_t *
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask[0U] = (uint32_t)totlen;
   mask[1U] = (uint32_t)(totlen >> 32U);
   mask[2U] = wv_14;
@@ -558,83 +573,6 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn)
   r1[3U] = iv7_;
 }
 
-static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint32_t tmp[8U] = { 0U };
-  uint32_t *r0 = hash;
-  uint32_t *r1 = hash + 4U;
-  uint32_t *r2 = hash + 8U;
-  uint32_t *r3 = hash + 12U;
-  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
-  r2[0U] = iv0;
-  r2[1U] = iv1;
-  r2[2U] = iv2;
-  r2[3U] = iv3;
-  r3[0U] = iv4;
-  r3[1U] = iv5;
-  r3[2U] = iv6;
-  r3[3U] = iv7;
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint32_t)p.digest_length
-    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
-  tmp[1U] = p.leaf_length;
-  tmp[2U] = (uint32_t)p.node_offset;
-  tmp[3U] =
-    (uint32_t)(p.node_offset >> 32U)
-    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
-  uint32_t tmp0 = tmp[0U];
-  uint32_t tmp1 = tmp[1U];
-  uint32_t tmp2 = tmp[2U];
-  uint32_t tmp3 = tmp[3U];
-  uint32_t tmp4 = tmp[4U];
-  uint32_t tmp5 = tmp[5U];
-  uint32_t tmp6 = tmp[6U];
-  uint32_t tmp7 = tmp[7U];
-  uint32_t iv0_ = iv0 ^ tmp0;
-  uint32_t iv1_ = iv1 ^ tmp1;
-  uint32_t iv2_ = iv2 ^ tmp2;
-  uint32_t iv3_ = iv3 ^ tmp3;
-  uint32_t iv4_ = iv4 ^ tmp4;
-  uint32_t iv5_ = iv5 ^ tmp5;
-  uint32_t iv6_ = iv6 ^ tmp6;
-  uint32_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = iv0_;
-  r0[1U] = iv1_;
-  r0[2U] = iv2_;
-  r0[3U] = iv3_;
-  r1[0U] = iv4_;
-  r1[1U] = iv5_;
-  r1[2U] = iv6_;
-  r1[3U] = iv7_;
-}
-
 static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll)
 {
   uint64_t lb = (uint64_t)64U;
@@ -642,11 +580,11 @@ static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, ui
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -666,7 +604,7 @@ Hacl_Hash_Blake2s_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -675,6 +613,7 @@ Hacl_Hash_Blake2s_update_last(
   uint32_t len,
   uint32_t *wv,
   uint32_t *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -684,7 +623,7 @@ Hacl_Hash_Blake2s_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -712,7 +651,7 @@ update_blocks(uint32_t len, uint32_t *wv, uint32_t *hash, uint64_t prev, uint8_t
     rem = rem0;
   }
   Hacl_Hash_Blake2s_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -747,16 +686,19 @@ void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash)
 }
 
 static Hacl_Hash_Blake2s_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -775,7 +717,9 @@ static Hacl_Hash_Blake2s_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint32_t *h = block_state.f3.snd;
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -785,38 +729,127 @@ static Hacl_Hash_Blake2s_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = h;
+  uint32_t *r1 = h + 4U;
+  uint32_t *r2 = h + 8U;
+  uint32_t *r3 = h + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (32 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 32 for S, 64 for B.
+- The digest_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t
-*Hacl_Hash_Blake2s_malloc_with_params_and_key(Hacl_Hash_Blake2b_blake2_params *p, uint8_t *k)
+*Hacl_Hash_Blake2s_malloc_with_params_and_key(
+  Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
+  uint8_t *k
+)
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (32 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 32 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -824,20 +857,15 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
-  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
+  Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 {
@@ -847,28 +875,31 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s)
 {
   Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
-static void
-reset_raw(
-  Hacl_Hash_Blake2s_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  uint32_t *h = block_state.f3.snd;
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -878,7 +909,79 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint32_t tmp[8U] = { 0U };
+  uint32_t *r0 = h;
+  uint32_t *r1 = h + 4U;
+  uint32_t *r2 = h + 8U;
+  uint32_t *r3 = h + 12U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = iv0;
+  r2[1U] = iv1;
+  r2[2U] = iv2;
+  r2[3U] = iv3;
+  r3[0U] = iv4;
+  r3[1U] = iv5;
+  r3[2U] = iv6;
+  r3[3U] = iv7;
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = iv0_;
+  r0[1U] = iv1_;
+  r0[2U] = iv2_;
+  r0[3U] = iv3_;
+  r1[0U] = iv4_;
+  r1[1U] = iv5_;
+  r1[2U] = iv6_;
+  r1[3U] = iv7_;
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -890,14 +993,16 @@ reset_raw(
     ite = 0U;
   }
   Hacl_Hash_Blake2s_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
+  state[0U] = tmp8;
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_reset_with_key_and_params(
@@ -906,15 +1011,17 @@ Hacl_Hash_Blake2s_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
 {
@@ -929,11 +1036,16 @@ void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k)
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 {
@@ -941,7 +1053,7 @@ void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len)
@@ -1007,7 +1119,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      K____uint32_t___uint32_t_ acc = block_state1.f3;
       uint32_t *wv = acc.fst;
       uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1027,7 +1139,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    K____uint32_t___uint32_t_ acc = block_state1.f3;
     uint32_t *wv = acc.fst;
     uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -1090,7 +1202,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____uint32_t___uint32_t_ acc = block_state1.thd;
+      K____uint32_t___uint32_t_ acc = block_state1.f3;
       uint32_t *wv = acc.fst;
       uint32_t *hash = acc.snd;
       uint32_t nb = 1U;
@@ -1111,7 +1223,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____uint32_t___uint32_t_ acc = block_state1.thd;
+    K____uint32_t___uint32_t_ acc = block_state1.f3;
     uint32_t *wv = acc.fst;
     uint32_t *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -1132,15 +1244,25 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_state_t scrut = *state;
+  Hacl_Hash_Blake2s_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_state_t scrut = *s;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1158,9 +1280,14 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   uint32_t b[16U] = { 0U };
   Hacl_Hash_Blake2s_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  uint32_t *src_b = block_state.thd.snd;
-  uint32_t *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  uint32_t *src_b = block_state.f3.snd;
+  uint32_t *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1174,18 +1301,35 @@ void Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *state, uint8_t *output)
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  K____uint32_t___uint32_t_ acc0 = tmp_block_state.thd;
+  K____uint32_t___uint32_t_ acc0 = tmp_block_state.f3;
   uint32_t *wv1 = acc0.fst;
   uint32_t *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
-  K____uint32_t___uint32_t_ acc = tmp_block_state.thd;
+  K____uint32_t___uint32_t_ acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   uint32_t *wv = acc.fst;
   uint32_t *hash = acc.snd;
-  Hacl_Hash_Blake2s_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Hacl_Hash_Blake2s_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2s_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s)
+{
+  Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1196,8 +1340,8 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
   Hacl_Hash_Blake2s_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state;
-  uint32_t *b = block_state.thd.snd;
-  uint32_t *wv = block_state.thd.fst;
+  uint32_t *b = block_state.f3.snd;
+  uint32_t *wv = block_state.f3.fst;
   KRML_HOST_FREE(wv);
   KRML_HOST_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1205,7 +1349,7 @@ void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state)
 {
@@ -1213,17 +1357,24 @@ Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *sta
   Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t));
   Hacl_Hash_Blake2s_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  uint32_t *src_b = block_state0.thd.snd;
-  uint32_t *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  uint32_t *src_b = block_state0.f3.snd;
+  uint32_t *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 16U * sizeof (uint32_t));
   Hacl_Hash_Blake2s_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1262,8 +1413,14 @@ Hacl_Hash_Blake2s_hash_with_key(
   Lib_Memzero0_memzero(b, 16U, uint32_t, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_Blake2s_Simd128.c b/src/msvc/Hacl_Hash_Blake2s_Simd128.c
index c02da8fa..3b68783b 100644
--- a/src/msvc/Hacl_Hash_Blake2s_Simd128.c
+++ b/src/msvc/Hacl_Hash_Blake2s_Simd128.c
@@ -34,6 +34,7 @@ update_block(
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
   bool flag,
+  bool last_node,
   uint64_t totlen,
   uint8_t *d
 )
@@ -59,7 +60,15 @@ update_block(
   {
     wv_14 = 0U;
   }
-  uint32_t wv_15 = 0U;
+  uint32_t wv_15;
+  if (last_node)
+  {
+    wv_15 = 0xFFFFFFFFU;
+  }
+  else
+  {
+    wv_15 = 0U;
+  }
   mask =
     Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)totlen,
       (uint32_t)(totlen >> 32U),
@@ -286,72 +295,6 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k
   r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
 }
 
-static void
-init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p)
-{
-  uint32_t tmp[8U] = { 0U };
-  Lib_IntVector_Intrinsics_vec128 *r0 = hash;
-  Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U;
-  Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U;
-  Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U;
-  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
-  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
-  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
-  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
-  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
-  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
-  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
-  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
-  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
-  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 4U;
-    uint8_t *bj = p.salt + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  KRML_MAYBE_FOR2(i,
-    0U,
-    2U,
-    1U,
-    uint32_t *os = tmp + 6U;
-    uint8_t *bj = p.personal + i * 4U;
-    uint32_t u = load32_le(bj);
-    uint32_t r = u;
-    uint32_t x = r;
-    os[i] = x;);
-  tmp[0U] =
-    (uint32_t)p.digest_length
-    ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U));
-  tmp[1U] = p.leaf_length;
-  tmp[2U] = (uint32_t)p.node_offset;
-  tmp[3U] =
-    (uint32_t)(p.node_offset >> 32U)
-    ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U);
-  uint32_t tmp0 = tmp[0U];
-  uint32_t tmp1 = tmp[1U];
-  uint32_t tmp2 = tmp[2U];
-  uint32_t tmp3 = tmp[3U];
-  uint32_t tmp4 = tmp[4U];
-  uint32_t tmp5 = tmp[5U];
-  uint32_t tmp6 = tmp[6U];
-  uint32_t tmp7 = tmp[7U];
-  uint32_t iv0_ = iv0 ^ tmp0;
-  uint32_t iv1_ = iv1 ^ tmp1;
-  uint32_t iv2_ = iv2 ^ tmp2;
-  uint32_t iv3_ = iv3 ^ tmp3;
-  uint32_t iv4_ = iv4 ^ tmp4;
-  uint32_t iv5_ = iv5 ^ tmp5;
-  uint32_t iv6_ = iv6 ^ tmp6;
-  uint32_t iv7_ = iv7 ^ tmp7;
-  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
-  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
-}
-
 static void
 update_key(
   Lib_IntVector_Intrinsics_vec128 *wv,
@@ -366,11 +309,11 @@ update_key(
   memcpy(b, k, kk * sizeof (uint8_t));
   if (ll == 0U)
   {
-    update_block(wv, hash, true, lb, b);
+    update_block(wv, hash, true, false, lb, b);
   }
   else
   {
-    update_block(wv, hash, false, lb, b);
+    update_block(wv, hash, false, false, lb, b);
   }
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
@@ -390,7 +333,7 @@ Hacl_Hash_Blake2s_Simd128_update_multi(
   {
     uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U);
     uint8_t *b = blocks + i * 64U;
-    update_block(wv, hash, false, totlen, b);
+    update_block(wv, hash, false, false, totlen, b);
   }
 }
 
@@ -399,6 +342,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint32_t len,
   Lib_IntVector_Intrinsics_vec128 *wv,
   Lib_IntVector_Intrinsics_vec128 *hash,
+  bool last_node,
   uint64_t prev,
   uint32_t rem,
   uint8_t *d
@@ -408,7 +352,7 @@ Hacl_Hash_Blake2s_Simd128_update_last(
   uint8_t *last = d + len - rem;
   memcpy(b, last, rem * sizeof (uint8_t));
   uint64_t totlen = prev + (uint64_t)len;
-  update_block(wv, hash, true, totlen, b);
+  update_block(wv, hash, true, last_node, totlen, b);
   Lib_Memzero0_memzero(b, 64U, uint8_t, void *);
 }
 
@@ -442,7 +386,7 @@ update_blocks(
     rem = rem0;
   }
   Hacl_Hash_Blake2s_Simd128_update_multi(len, wv, hash, prev, blocks, nb);
-  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, prev, rem, blocks);
+  Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, false, prev, rem, blocks);
 }
 
 static inline void
@@ -583,10 +527,7 @@ Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void)
 }
 
 static Hacl_Hash_Blake2s_Simd128_state_t
-*malloc_raw(
-  Hacl_Hash_Blake2b_index kk,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key)
 {
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -600,7 +541,13 @@ static Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = kk.key_length, .snd = kk.digest_length, .thd = { .fst = wv, .snd = b } };
+  block_state =
+    {
+      .fst = kk.key_length,
+      .snd = kk.digest_length,
+      .thd = kk.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
   uint8_t kk10 = kk.key_length;
   uint32_t ite;
   if (kk10 != 0U)
@@ -622,7 +569,9 @@ static Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2b_blake2_params *p1 = key.fst;
   uint8_t kk1 = p1->key_length;
   uint8_t nn = p1->digest_length;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd;
   uint32_t kk2 = (uint32_t)i.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -632,42 +581,116 @@ static Hacl_Hash_Blake2s_Simd128_state_t
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p1[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = h;
+  Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r4 = u;
+    uint32_t x = r4;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
   return p;
 }
 
 /**
- State allocation function when there are parameters and a key. The
-length of the key k MUST match the value of the field key_length in the
-parameters. Furthermore, there is a static (not dynamically checked) requirement
-that key_length does not exceed max_key (128 for S, 64 for B).)
+ General-purpose allocation function that gives control over all
+Blake2 parameters, including the key. Further resettings of the state SHALL be
+done with `reset_with_params_and_key`, and SHALL feature the exact same values
+for the `key_length` and `digest_length` fields as passed here. In other words,
+once you commit to a digest and key length, the only way to change these
+parameters is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The length of the key k MUST match the value of the field key_length in the
+  parameters.
+- The key_length must not exceed 128 for S, 64 for B.
+- The digest_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(
   Hacl_Hash_Blake2b_blake2_params *p,
+  bool last_node,
   uint8_t *k
 )
 {
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
   Hacl_Hash_Blake2b_index
-  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length };
-  return
-    malloc_raw(i1,
-      ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node };
+  return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- State allocation function when there is just a custom key. All
-other parameters are set to their respective default values, meaning the output
-length is the maximum allowed output (128 for S, 64 for B).
+ Specialized allocation function that picks default values for all
+parameters, except for the key_length. Further resettings of the state SHALL be
+done with `reset_with_key`, and SHALL feature the exact same key length `kk` as
+passed here. In other words, once you commit to a key length, the only way to
+change this parameter is to allocate a new object.
+
+The caller must satisfy the following requirements.
+- The key_length must not exceed 128 for S, 64 for B.
+
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk)
 {
   uint8_t nn = 32U;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn };
-  uint8_t *salt = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
-  uint8_t *personal = (uint8_t *)KRML_HOST_CALLOC(8U, sizeof (uint8_t));
+  Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false };
+  uint8_t salt[8U] = { 0U };
+  uint8_t personal[8U] = { 0U };
   Hacl_Hash_Blake2b_blake2_params
   p =
     {
@@ -675,21 +698,16 @@ Hacl_Hash_Blake2s_Simd128_state_t
       .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt,
       .personal = personal
     };
-  Hacl_Hash_Blake2b_blake2_params
-  *p0 =
-    (Hacl_Hash_Blake2b_blake2_params *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_blake2_params));
-  p0[0U] = p;
+  Hacl_Hash_Blake2b_blake2_params p0 = p;
   Hacl_Hash_Blake2s_Simd128_state_t
-  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(p0, k);
-  Hacl_Hash_Blake2b_blake2_params p1 = p0[0U];
-  KRML_HOST_FREE(p1.salt);
-  KRML_HOST_FREE(p1.personal);
-  KRML_HOST_FREE(p0);
+  *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&p0, false, k);
   return s;
 }
 
 /**
-  State allocation function when there is no key
+ Specialized allocation function that picks default values for all
+parameters, and has no key. Effectively, this is what you want if you intend to
+use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`.
 */
 Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 {
@@ -699,28 +717,32 @@ Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void)
 static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
   uint8_t nn = block_state.snd;
   uint8_t kk1 = block_state.fst;
-  return ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn });
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node });
 }
 
 static void
-reset_raw(
-  Hacl_Hash_Blake2s_Simd128_state_t *state,
-  K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_ key
-)
+reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and_key key)
 {
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
+  bool last_node0 = block_state.thd;
   uint8_t nn0 = block_state.snd;
   uint8_t kk10 = block_state.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk10, .digest_length = nn0 };
+  Hacl_Hash_Blake2b_index
+  i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 };
   KRML_MAYBE_UNUSED_VAR(i);
   Hacl_Hash_Blake2b_blake2_params *p = key.fst;
   uint8_t kk1 = p->key_length;
   uint8_t nn = p->digest_length;
-  Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn };
+  bool last_node = block_state.thd;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
+  Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd;
   uint32_t kk2 = (uint32_t)i1.key_length;
   uint8_t *k_1 = key.snd;
   if (!(kk2 == 0U))
@@ -730,7 +752,67 @@ reset_raw(
     memcpy(buf, k_1, kk2 * sizeof (uint8_t));
   }
   Hacl_Hash_Blake2b_blake2_params pv = p[0U];
-  init_with_params(block_state.thd.snd, pv);
+  uint32_t tmp[8U] = { 0U };
+  Lib_IntVector_Intrinsics_vec128 *r0 = h;
+  Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U;
+  Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U;
+  Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U;
+  uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U];
+  uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U];
+  uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U];
+  uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U];
+  uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U];
+  uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U];
+  uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U];
+  uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U];
+  r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3);
+  r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 4U;
+    uint8_t *bj = pv.salt + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i0] = x;);
+  KRML_MAYBE_FOR2(i0,
+    0U,
+    2U,
+    1U,
+    uint32_t *os = tmp + 6U;
+    uint8_t *bj = pv.personal + i0 * 4U;
+    uint32_t u = load32_le(bj);
+    uint32_t r = u;
+    uint32_t x = r;
+    os[i0] = x;);
+  tmp[0U] =
+    (uint32_t)pv.digest_length
+    ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U));
+  tmp[1U] = pv.leaf_length;
+  tmp[2U] = (uint32_t)pv.node_offset;
+  tmp[3U] =
+    (uint32_t)(pv.node_offset >> 32U)
+    ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U);
+  uint32_t tmp0 = tmp[0U];
+  uint32_t tmp1 = tmp[1U];
+  uint32_t tmp2 = tmp[2U];
+  uint32_t tmp3 = tmp[3U];
+  uint32_t tmp4 = tmp[4U];
+  uint32_t tmp5 = tmp[5U];
+  uint32_t tmp6 = tmp[6U];
+  uint32_t tmp7 = tmp[7U];
+  uint32_t iv0_ = iv0 ^ tmp0;
+  uint32_t iv1_ = iv1 ^ tmp1;
+  uint32_t iv2_ = iv2 ^ tmp2;
+  uint32_t iv3_ = iv3 ^ tmp3;
+  uint32_t iv4_ = iv4 ^ tmp4;
+  uint32_t iv5_ = iv5 ^ tmp5;
+  uint32_t iv6_ = iv6 ^ tmp6;
+  uint32_t iv7_ = iv7 ^ tmp7;
+  r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_);
+  r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_);
   uint8_t kk11 = i.key_length;
   uint32_t ite;
   if (kk11 != 0U)
@@ -742,14 +824,16 @@ reset_raw(
     ite = 0U;
   }
   Hacl_Hash_Blake2s_Simd128_state_t
-  tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
-  state[0U] = tmp;
+  tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite };
+  state[0U] = tmp8;
 }
 
 /**
- Re-initialization function. The reinitialization API is tricky --
-you MUST reuse the same original parameters for digest (output) length and key
-length.
+ General-purpose re-initialization function with parameters and
+key. You cannot change digest_length, key_length, or last_node, meaning those values in
+the parameters object must be the same as originally decided via one of the
+malloc functions. All other values of the parameter can be changed. The behavior
+is unspecified if you violate this precondition.
 */
 void
 Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
@@ -758,15 +842,17 @@ Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params(
   uint8_t *k
 )
 {
-  index_of_state(s);
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = p, .snd = k }));
+  Hacl_Hash_Blake2b_index i1 = index_of_state(s);
+  KRML_MAYBE_UNUSED_VAR(i1);
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k }));
 }
 
 /**
- Re-initialization function when there is a key. Note that the key
-size is not allowed to change, which is why this function does not take a key
-length -- the key has to be same key size that was originally passed to
-`malloc_with_key`
+ Specialized-purpose re-initialization function with no parameters,
+and a key. The key length must be the same as originally decided via your choice
+of malloc function. All other parameters are reset to their default values. The
+original call to malloc MUST have set digest_length to the default value. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k)
 {
@@ -781,11 +867,16 @@ void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t
       .personal = personal
     };
   Hacl_Hash_Blake2b_blake2_params p0 = p;
-  reset_raw(s, ((K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_){ .fst = &p0, .snd = k }));
+  reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k }));
 }
 
 /**
-  Re-initialization function when there is no key
+ Specialized-purpose re-initialization function with no parameters
+and no key. This is what you want if you intend to use Blake2 as a hash
+function. The key length and digest length must have been set to their
+respective default values via your choice of malloc function (always true if you
+used `malloc`). All other parameters are reset to their default values. The
+behavior is unspecified if you violate this precondition.
 */
 void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 {
@@ -793,7 +884,7 @@ void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s)
 }
 
 /**
-  Update function when there is no key; 0 = success, 1 = max length exceeded
+  Update function; 0 = success, 1 = max length exceeded
 */
 Hacl_Streaming_Types_error_code
 Hacl_Hash_Blake2s_Simd128_update(
@@ -863,8 +954,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -884,7 +974,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - data1_len;
     uint8_t *data1 = chunk;
     uint8_t *data2 = chunk + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -947,8 +1037,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     if (!(sz1 == 0U))
     {
       uint64_t prevlen = total_len1 - (uint64_t)sz1;
-      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-      acc = block_state1.thd;
+      K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
       Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
       Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
       uint32_t nb = 1U;
@@ -969,7 +1058,7 @@ Hacl_Hash_Blake2s_Simd128_update(
     uint32_t data2_len = chunk_len - diff - data1_len;
     uint8_t *data1 = chunk2;
     uint8_t *data2 = chunk2 + data1_len;
-    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.thd;
+    K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3;
     Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
     Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
     uint32_t nb = data1_len / 64U;
@@ -990,16 +1079,25 @@ Hacl_Hash_Blake2s_Simd128_update(
 }
 
 /**
-  Finish function when there is no key
+ Digest function. This function expects the `output` array to hold
+at least `digest_length` bytes, where `digest_length` was determined by your
+choice of `malloc` function. Concretely, if you used `malloc` or
+`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default
+digest length). If you used `malloc_with_params_and_key`, then the expected
+length is whatever you chose for the `digest_length` field of your parameters.
+For convenience, this function returns `digest_length`. When in doubt, callers
+can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value
+to see how many bytes were actually written.
 */
-void
-Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8_t *output)
+uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst)
 {
-  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*state).block_state;
-  uint8_t nn = block_state0.snd;
-  uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
-  Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*s).block_state;
+  bool last_node0 = block_state0.thd;
+  uint8_t nn0 = block_state0.snd;
+  uint8_t kk0 = block_state0.fst;
+  Hacl_Hash_Blake2b_index
+  i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 };
+  Hacl_Hash_Blake2s_Simd128_state_t scrut = *s;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
   uint8_t *buf_ = scrut.buf;
   uint64_t total_len = scrut.total_len;
@@ -1017,9 +1115,14 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U };
   Hacl_Hash_Blake2s_Simd128_block_state_t
   tmp_block_state =
-    { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv0, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.thd.snd;
+    {
+      .fst = i1.key_length,
+      .snd = i1.digest_length,
+      .thd = i1.last_node,
+      .f3 = { .fst = wv0, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   uint64_t prev_len = total_len - (uint64_t)r;
   uint32_t ite;
@@ -1034,19 +1137,36 @@ Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *state, uint8
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
   K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc0 = tmp_block_state.thd;
+  acc0 = tmp_block_state.f3;
   Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst;
   Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd;
   uint32_t nb = 0U;
   Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_
-  acc = tmp_block_state.thd;
+  acc = tmp_block_state.f3;
+  bool last_node1 = tmp_block_state.thd;
   Lib_IntVector_Intrinsics_vec128 *wv = acc.fst;
   Lib_IntVector_Intrinsics_vec128 *hash = acc.snd;
-  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, prev_len_last, r, buf_last);
-  uint8_t nn0 = tmp_block_state.snd;
-  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn0, output, tmp_block_state.thd.snd);
+  Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last);
+  uint8_t nn1 = tmp_block_state.snd;
+  Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd);
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = (*s).block_state;
+  bool last_node = block_state1.thd;
+  uint8_t nn = block_state1.snd;
+  uint8_t kk = block_state1.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length;
+}
+
+Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s)
+{
+  Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state;
+  bool last_node = block_state.thd;
+  uint8_t nn = block_state.snd;
+  uint8_t kk = block_state.fst;
+  return
+    ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node });
 }
 
 /**
@@ -1057,8 +1177,8 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
   Hacl_Hash_Blake2s_Simd128_state_t scrut = *state;
   uint8_t *buf = scrut.buf;
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state;
-  Lib_IntVector_Intrinsics_vec128 *b = block_state.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *wv = block_state.thd.fst;
+  Lib_IntVector_Intrinsics_vec128 *b = block_state.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3.fst;
   KRML_ALIGNED_FREE(wv);
   KRML_ALIGNED_FREE(b);
   KRML_HOST_FREE(buf);
@@ -1066,7 +1186,7 @@ void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state)
 }
 
 /**
-  Copying. The key length (or absence thereof) must match between source and destination.
+  Copying. This preserves all parameters.
 */
 Hacl_Hash_Blake2s_Simd128_state_t
 *Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state)
@@ -1075,9 +1195,10 @@ Hacl_Hash_Blake2s_Simd128_state_t
   Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state;
   uint8_t *buf0 = scrut.buf;
   uint64_t total_len0 = scrut.total_len;
+  bool last_node = block_state0.thd;
   uint8_t nn = block_state0.snd;
   uint8_t kk1 = block_state0.fst;
-  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn };
+  Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node };
   uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t));
   memcpy(buf, buf0, 64U * sizeof (uint8_t));
   Lib_IntVector_Intrinsics_vec128
@@ -1091,9 +1212,15 @@ Hacl_Hash_Blake2s_Simd128_state_t
       sizeof (Lib_IntVector_Intrinsics_vec128) * 4U);
   memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_block_state_t
-  block_state = { .fst = i.key_length, .snd = i.digest_length, .thd = { .fst = wv, .snd = b } };
-  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.thd.snd;
-  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.thd.snd;
+  block_state =
+    {
+      .fst = i.key_length,
+      .snd = i.digest_length,
+      .thd = i.last_node,
+      .f3 = { .fst = wv, .snd = b }
+    };
+  Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.f3.snd;
+  Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.f3.snd;
   memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128));
   Hacl_Hash_Blake2s_Simd128_state_t
   s = { .block_state = block_state, .buf = buf, .total_len = total_len0 };
@@ -1135,8 +1262,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key(
   Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *);
 }
 
+/**
+Write the BLAKE2s digest of message `input` using key `key` and
+parameters `params` into `output`. The `key` array must be of length
+`params.key_length`. The `output` array must be of length
+`params.digest_length`.
+*/
 void
-Hacl_Hash_Blake2s_Simd128_hash_with_key_and_paramas(
+Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params(
   uint8_t *output,
   uint8_t *input,
   uint32_t input_len,
diff --git a/src/msvc/Hacl_Hash_SHA2.c b/src/msvc/Hacl_Hash_SHA2.c
index 995fe707..a9a6f452 100644
--- a/src/msvc/Hacl_Hash_SHA2.c
+++ b/src/msvc/Hacl_Hash_SHA2.c
@@ -211,7 +211,7 @@ void Hacl_Hash_SHA2_sha224_init(uint32_t *hash)
     os[i] = x;);
 }
 
-static inline void sha224_update_nblocks(uint32_t len, uint8_t *b, uint32_t *st)
+void Hacl_Hash_SHA2_sha224_update_nblocks(uint32_t len, uint8_t *b, uint32_t *st)
 {
   Hacl_Hash_SHA2_sha256_update_nblocks(len, b, st);
 }
@@ -825,7 +825,7 @@ void Hacl_Hash_SHA2_digest_224(Hacl_Streaming_MD_state_32 *state, uint8_t *outpu
   }
   uint8_t *buf_last = buf_1 + r - ite;
   uint8_t *buf_multi = buf_1;
-  sha224_update_nblocks(0U, buf_multi, tmp_block_state);
+  Hacl_Hash_SHA2_sha224_update_nblocks(0U, buf_multi, tmp_block_state);
   uint64_t prev_len_last = total_len - (uint64_t)r;
   Hacl_Hash_SHA2_sha224_update_last(prev_len_last + (uint64_t)r, r, buf_last, tmp_block_state);
   Hacl_Hash_SHA2_sha224_finish(tmp_block_state, output);
@@ -847,7 +847,7 @@ void Hacl_Hash_SHA2_hash_224(uint8_t *output, uint8_t *input, uint32_t input_len
   Hacl_Hash_SHA2_sha224_init(st);
   uint32_t rem = input_len % 64U;
   uint64_t len_ = (uint64_t)input_len;
-  sha224_update_nblocks(input_len, ib, st);
+  Hacl_Hash_SHA2_sha224_update_nblocks(input_len, ib, st);
   uint32_t rem1 = input_len % 64U;
   uint8_t *b0 = ib;
   uint8_t *lb = b0 + input_len - rem1;
diff --git a/src/msvc/Hacl_Hash_SHA3.c b/src/msvc/Hacl_Hash_SHA3.c
index 89bb0491..b964e1d9 100644
--- a/src/msvc/Hacl_Hash_SHA3.c
+++ b/src/msvc/Hacl_Hash_SHA3.c
@@ -251,7 +251,8 @@ Hacl_Hash_SHA3_update_multi_sha3(
     uint8_t *bl0 = b_;
     uint8_t *uu____0 = b0 + i * block_len(a);
     memcpy(bl0, uu____0, block_len(a) * sizeof (uint8_t));
-    block_len(a);
+    uint32_t unused = block_len(a);
+    KRML_MAYBE_UNUSED_VAR(unused);
     absorb_inner_32(b_, s);
   }
 }
@@ -2166,7 +2167,7 @@ void Hacl_Hash_SHA3_state_free(uint64_t *s)
 Absorb number of input blocks and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  It processes an input of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
@@ -2191,14 +2192,14 @@ Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t
 Absorb a final partial block of input and write the output state
 
   This function is intended to receive a hash state and input buffer.
-  It prcoesses a sequence of bytes at end of input buffer that is less 
+  It processes a sequence of bytes at end of input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffer are ignored.
 
   The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
   The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
   i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffer must be passed to `inputByteLen` including
   the number of full-block bytes at start of input buffer that are ignored
 */
diff --git a/src/msvc/Hacl_Hash_SHA3_Simd256.c b/src/msvc/Hacl_Hash_SHA3_Simd256.c
index 131c34e6..e0bb7e0b 100644
--- a/src/msvc/Hacl_Hash_SHA3_Simd256.c
+++ b/src/msvc/Hacl_Hash_SHA3_Simd256.c
@@ -5992,12 +5992,12 @@ void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
 Absorb number of blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  It processes an inputs of multiple of 168-bytes (SHAKE128 block size),
   any additional bytes of final partial block for each buffer are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
@@ -6038,15 +6038,15 @@ Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
 Absorb a final partial blocks of 4 input buffers and write the output states
 
   This function is intended to receive a quadruple hash state and 4 input buffers.
-  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  It processes a sequence of bytes at end of each input buffer that is less
   than 168-bytes (SHAKE128 block size),
   any bytes of full blocks at start of input buffers are ignored.
 
   The argument `state` (IN/OUT) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
-  
+
   Note: Full size of input buffers must be passed to `inputByteLen` including
   the number of full-block bytes at start of each input buffer that are ignored
 */
@@ -6378,7 +6378,7 @@ Squeeze a quadruple hash state to 4 output buffers
 
   The argument `state` (IN) points to quadruple hash state,
   i.e., Lib_IntVector_Intrinsics_vec256[25]
-  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes
   of valid memory for each buffer, i.e., uint8_t[inputByteLen]
 */
 void
diff --git a/src/msvc/Hacl_K256_ECDSA.c b/src/msvc/Hacl_K256_ECDSA.c
index 0aaab085..6f7bb632 100644
--- a/src/msvc/Hacl_K256_ECDSA.c
+++ b/src/msvc/Hacl_K256_ECDSA.c
@@ -351,7 +351,7 @@ static inline uint64_t load_qelem_check(uint64_t *f, uint8_t *b)
     1U,
     uint64_t beq = FStar_UInt64_eq_mask(f[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(f[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL))););
+    acc = (beq & acc) | (~beq & blt););
   uint64_t is_lt_q = acc;
   return ~is_zero & is_lt_q;
 }
@@ -372,11 +372,7 @@ static inline bool load_qelem_vartime(uint64_t *f, uint8_t *b)
   uint64_t a2 = f[2U];
   uint64_t a3 = f[3U];
   bool is_lt_q_b;
-  if (a3 < 0xffffffffffffffffULL)
-  {
-    is_lt_q_b = true;
-  }
-  else if (a2 < 0xfffffffffffffffeULL)
+  if (a3 < 0xffffffffffffffffULL || a2 < 0xfffffffffffffffeULL)
   {
     is_lt_q_b = true;
   }
@@ -510,12 +506,14 @@ static inline void modq(uint64_t *out, uint64_t *a)
   uint64_t *t01 = tmp;
   uint64_t m[7U] = { 0U };
   uint64_t p[5U] = { 0U };
-  mul_pow2_256_minus_q_add(4U, 7U, t01, a + 4U, a, m);
-  mul_pow2_256_minus_q_add(3U, 5U, t01, m + 4U, m, p);
+  uint64_t c0 = mul_pow2_256_minus_q_add(4U, 7U, t01, a + 4U, a, m);
+  KRML_MAYBE_UNUSED_VAR(c0);
+  uint64_t c10 = mul_pow2_256_minus_q_add(3U, 5U, t01, m + 4U, m, p);
+  KRML_MAYBE_UNUSED_VAR(c10);
   uint64_t c2 = mul_pow2_256_minus_q_add(1U, 4U, t01, p + 4U, p, r);
-  uint64_t c0 = c2;
+  uint64_t c00 = c2;
   uint64_t c1 = add4(r, tmp, out);
-  uint64_t mask = 0ULL - (c0 + c1);
+  uint64_t mask = 0ULL - (c00 + c1);
   KRML_MAYBE_FOR4(i,
     0U,
     4U,
@@ -567,11 +565,7 @@ static inline bool is_qelem_le_q_halved_vartime(uint64_t *f)
   {
     return false;
   }
-  if (a2 < 0xffffffffffffffffULL)
-  {
-    return true;
-  }
-  if (a1 < 0x5d576e7357a4501dULL)
+  if (a2 < 0xffffffffffffffffULL || a1 < 0x5d576e7357a4501dULL)
   {
     return true;
   }
diff --git a/src/msvc/Hacl_RSAPSS.c b/src/msvc/Hacl_RSAPSS.c
index cd19195d..7b004455 100644
--- a/src/msvc/Hacl_RSAPSS.c
+++ b/src/msvc/Hacl_RSAPSS.c
@@ -167,7 +167,7 @@ static inline uint64_t check_num_bits_u64(uint32_t bs, uint64_t *b)
   {
     uint64_t beq = FStar_UInt64_eq_mask(b[i], b2[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(b[i], b2[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t res = acc;
   return res;
@@ -189,7 +189,7 @@ static inline uint64_t check_modulus_u64(uint32_t modBits, uint64_t *n)
   {
     uint64_t beq = FStar_UInt64_eq_mask(b2[i], n[i]);
     uint64_t blt = ~FStar_UInt64_gte_mask(b2[i], n[i]);
-    acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+    acc = (beq & acc) | (~beq & blt);
   }
   uint64_t res = acc;
   uint64_t m1 = res;
@@ -288,11 +288,7 @@ pss_verify(
     em_0 = 0U;
   }
   uint8_t em_last = em[emLen - 1U];
-  if (emLen < saltLen + hash_len(a) + 2U)
-  {
-    return false;
-  }
-  if (!(em_last == 0xbcU && em_0 == 0U))
+  if (emLen < saltLen + hash_len(a) + 2U || !(em_last == 0xbcU && em_0 == 0U))
   {
     return false;
   }
@@ -553,7 +549,7 @@ Hacl_RSAPSS_rsapss_verify(
     {
       uint64_t beq = FStar_UInt64_eq_mask(s[i], n[i]);
       uint64_t blt = ~FStar_UInt64_gte_mask(s[i], n[i]);
-      acc = (beq & acc) | (~beq & ((blt & 0xFFFFFFFFFFFFFFFFULL) | (~blt & 0ULL)));
+      acc = (beq & acc) | (~beq & blt);
     }
     uint64_t mask = acc;
     bool res;
@@ -568,10 +564,9 @@ Hacl_RSAPSS_rsapss_verify(
         eBits,
         e,
         m);
-      bool ite;
       if (!((modBits - 1U) % 8U == 0U))
       {
-        ite = true;
+        res = true;
       }
       else
       {
@@ -579,15 +574,7 @@ Hacl_RSAPSS_rsapss_verify(
         uint32_t j = (modBits - 1U) % 64U;
         uint64_t tmp = m[i];
         uint64_t get_bit = tmp >> j & 1ULL;
-        ite = get_bit == 0ULL;
-      }
-      if (ite)
-      {
-        res = true;
-      }
-      else
-      {
-        res = false;
+        res = get_bit == 0ULL;
       }
     }
     else
diff --git a/src/msvc/Lib_Memzero0.c b/src/msvc/Lib_Memzero0.c
index 3d8a1e5f..5c269d23 100644
--- a/src/msvc/Lib_Memzero0.c
+++ b/src/msvc/Lib_Memzero0.c
@@ -13,7 +13,7 @@
 #include <string.h>
 #endif
 
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include <strings.h>
 #endif
 
@@ -36,7 +36,7 @@ void Lib_Memzero0_memzero0(void *dst, uint64_t len) {
   size_t len_ = (size_t) len;
 
   #ifdef _WIN32
-    SecureZeroMemory(dst, len);
+    SecureZeroMemory(dst, len_);
   #elif defined(__APPLE__) && defined(__MACH__)
     memset_s(dst, len_, 0, len_);
   #elif (defined(__linux__) && !defined(LINUX_NO_EXPLICIT_BZERO)) || defined(__FreeBSD__)
diff --git a/src/wasm/EverCrypt_Hash.wasm b/src/wasm/EverCrypt_Hash.wasm
index 1447feb3..c6ecc136 100644
Binary files a/src/wasm/EverCrypt_Hash.wasm and b/src/wasm/EverCrypt_Hash.wasm differ
diff --git a/src/wasm/Hacl_Bignum.wasm b/src/wasm/Hacl_Bignum.wasm
index 6e090b50..0ad9988a 100644
Binary files a/src/wasm/Hacl_Bignum.wasm and b/src/wasm/Hacl_Bignum.wasm differ
diff --git a/src/wasm/Hacl_Bignum256.wasm b/src/wasm/Hacl_Bignum256.wasm
index b28b276b..ed099987 100644
Binary files a/src/wasm/Hacl_Bignum256.wasm and b/src/wasm/Hacl_Bignum256.wasm differ
diff --git a/src/wasm/Hacl_Bignum256_32.wasm b/src/wasm/Hacl_Bignum256_32.wasm
index 5fcc70ae..31c7d5a7 100644
Binary files a/src/wasm/Hacl_Bignum256_32.wasm and b/src/wasm/Hacl_Bignum256_32.wasm differ
diff --git a/src/wasm/Hacl_Bignum32.wasm b/src/wasm/Hacl_Bignum32.wasm
index c2102b81..b34c2c74 100644
Binary files a/src/wasm/Hacl_Bignum32.wasm and b/src/wasm/Hacl_Bignum32.wasm differ
diff --git a/src/wasm/Hacl_Bignum4096.wasm b/src/wasm/Hacl_Bignum4096.wasm
index 6cc1bf47..1088ab54 100644
Binary files a/src/wasm/Hacl_Bignum4096.wasm and b/src/wasm/Hacl_Bignum4096.wasm differ
diff --git a/src/wasm/Hacl_Bignum4096_32.wasm b/src/wasm/Hacl_Bignum4096_32.wasm
index 35bcb037..6518eb4f 100644
Binary files a/src/wasm/Hacl_Bignum4096_32.wasm and b/src/wasm/Hacl_Bignum4096_32.wasm differ
diff --git a/src/wasm/Hacl_Bignum64.wasm b/src/wasm/Hacl_Bignum64.wasm
index d7db1531..221c3e39 100644
Binary files a/src/wasm/Hacl_Bignum64.wasm and b/src/wasm/Hacl_Bignum64.wasm differ
diff --git a/src/wasm/Hacl_Bignum_Base.wasm b/src/wasm/Hacl_Bignum_Base.wasm
index e407cd78..9e75139e 100644
Binary files a/src/wasm/Hacl_Bignum_Base.wasm and b/src/wasm/Hacl_Bignum_Base.wasm differ
diff --git a/src/wasm/Hacl_Ed25519.wasm b/src/wasm/Hacl_Ed25519.wasm
index 5fa25fad..cdd77fd6 100644
Binary files a/src/wasm/Hacl_Ed25519.wasm and b/src/wasm/Hacl_Ed25519.wasm differ
diff --git a/src/wasm/Hacl_GenericField32.wasm b/src/wasm/Hacl_GenericField32.wasm
index 52efafdf..12c62b40 100644
Binary files a/src/wasm/Hacl_GenericField32.wasm and b/src/wasm/Hacl_GenericField32.wasm differ
diff --git a/src/wasm/Hacl_GenericField64.wasm b/src/wasm/Hacl_GenericField64.wasm
index a475b2db..fa41a05b 100644
Binary files a/src/wasm/Hacl_GenericField64.wasm and b/src/wasm/Hacl_GenericField64.wasm differ
diff --git a/src/wasm/Hacl_HKDF_Blake2s_128.wasm b/src/wasm/Hacl_HKDF_Blake2s_128.wasm
index 03362c9f..d3975181 100644
Binary files a/src/wasm/Hacl_HKDF_Blake2s_128.wasm and b/src/wasm/Hacl_HKDF_Blake2s_128.wasm differ
diff --git a/src/wasm/Hacl_HMAC.wasm b/src/wasm/Hacl_HMAC.wasm
index 8752dda8..9f478611 100644
Binary files a/src/wasm/Hacl_HMAC.wasm and b/src/wasm/Hacl_HMAC.wasm differ
diff --git a/src/wasm/Hacl_HMAC_Blake2b_256.wasm b/src/wasm/Hacl_HMAC_Blake2b_256.wasm
index 9ee78af8..38740139 100644
Binary files a/src/wasm/Hacl_HMAC_Blake2b_256.wasm and b/src/wasm/Hacl_HMAC_Blake2b_256.wasm differ
diff --git a/src/wasm/Hacl_HMAC_Blake2s_128.wasm b/src/wasm/Hacl_HMAC_Blake2s_128.wasm
index 22fce826..f9259a47 100644
Binary files a/src/wasm/Hacl_HMAC_Blake2s_128.wasm and b/src/wasm/Hacl_HMAC_Blake2s_128.wasm differ
diff --git a/src/wasm/Hacl_HMAC_DRBG.wasm b/src/wasm/Hacl_HMAC_DRBG.wasm
index f536237d..aac0b09e 100644
Binary files a/src/wasm/Hacl_HMAC_DRBG.wasm and b/src/wasm/Hacl_HMAC_DRBG.wasm differ
diff --git a/src/wasm/Hacl_HPKE_Curve51_CP32_SHA256.wasm b/src/wasm/Hacl_HPKE_Curve51_CP32_SHA256.wasm
index 37798d12..655c8058 100644
Binary files a/src/wasm/Hacl_HPKE_Curve51_CP32_SHA256.wasm and b/src/wasm/Hacl_HPKE_Curve51_CP32_SHA256.wasm differ
diff --git a/src/wasm/Hacl_HPKE_Curve51_CP32_SHA512.wasm b/src/wasm/Hacl_HPKE_Curve51_CP32_SHA512.wasm
index 2b7c2496..a4c5a62d 100644
Binary files a/src/wasm/Hacl_HPKE_Curve51_CP32_SHA512.wasm and b/src/wasm/Hacl_HPKE_Curve51_CP32_SHA512.wasm differ
diff --git a/src/wasm/Hacl_Hash_Base.wasm b/src/wasm/Hacl_Hash_Base.wasm
index 3b04e240..d32d9a3e 100644
Binary files a/src/wasm/Hacl_Hash_Base.wasm and b/src/wasm/Hacl_Hash_Base.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2b.wasm b/src/wasm/Hacl_Hash_Blake2b.wasm
index 29138d3d..6faec1bb 100644
Binary files a/src/wasm/Hacl_Hash_Blake2b.wasm and b/src/wasm/Hacl_Hash_Blake2b.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm b/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm
index 1e2c80b7..ad574d25 100644
Binary files a/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm and b/src/wasm/Hacl_Hash_Blake2b_Simd256.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2s.wasm b/src/wasm/Hacl_Hash_Blake2s.wasm
index 8e69e8f7..cb12f6c1 100644
Binary files a/src/wasm/Hacl_Hash_Blake2s.wasm and b/src/wasm/Hacl_Hash_Blake2s.wasm differ
diff --git a/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm b/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm
index b1a26f75..dc99dd8a 100644
Binary files a/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm and b/src/wasm/Hacl_Hash_Blake2s_Simd128.wasm differ
diff --git a/src/wasm/Hacl_Hash_MD5.wasm b/src/wasm/Hacl_Hash_MD5.wasm
index 0efb5420..b6fa9371 100644
Binary files a/src/wasm/Hacl_Hash_MD5.wasm and b/src/wasm/Hacl_Hash_MD5.wasm differ
diff --git a/src/wasm/Hacl_Hash_SHA1.wasm b/src/wasm/Hacl_Hash_SHA1.wasm
index 35f56707..56378ff8 100644
Binary files a/src/wasm/Hacl_Hash_SHA1.wasm and b/src/wasm/Hacl_Hash_SHA1.wasm differ
diff --git a/src/wasm/Hacl_Hash_SHA2.wasm b/src/wasm/Hacl_Hash_SHA2.wasm
index 09296bcc..31d08a71 100644
Binary files a/src/wasm/Hacl_Hash_SHA2.wasm and b/src/wasm/Hacl_Hash_SHA2.wasm differ
diff --git a/src/wasm/Hacl_Hash_SHA3.wasm b/src/wasm/Hacl_Hash_SHA3.wasm
index 8104d0a6..faa851fc 100644
Binary files a/src/wasm/Hacl_Hash_SHA3.wasm and b/src/wasm/Hacl_Hash_SHA3.wasm differ
diff --git a/src/wasm/Hacl_IntTypes_Intrinsics.wasm b/src/wasm/Hacl_IntTypes_Intrinsics.wasm
index d63ddd1a..b5cece2c 100644
Binary files a/src/wasm/Hacl_IntTypes_Intrinsics.wasm and b/src/wasm/Hacl_IntTypes_Intrinsics.wasm differ
diff --git a/src/wasm/Hacl_K256_ECDSA.wasm b/src/wasm/Hacl_K256_ECDSA.wasm
index 5022a27e..a68d42d9 100644
Binary files a/src/wasm/Hacl_K256_ECDSA.wasm and b/src/wasm/Hacl_K256_ECDSA.wasm differ
diff --git a/src/wasm/Hacl_MAC_Poly1305.wasm b/src/wasm/Hacl_MAC_Poly1305.wasm
index c4e38920..60eac1db 100644
Binary files a/src/wasm/Hacl_MAC_Poly1305.wasm and b/src/wasm/Hacl_MAC_Poly1305.wasm differ
diff --git a/src/wasm/Hacl_P256.wasm b/src/wasm/Hacl_P256.wasm
index 017ee9e8..05109e51 100644
Binary files a/src/wasm/Hacl_P256.wasm and b/src/wasm/Hacl_P256.wasm differ
diff --git a/src/wasm/INFO.txt b/src/wasm/INFO.txt
index e7adb2e4..6cb6c87b 100644
--- a/src/wasm/INFO.txt
+++ b/src/wasm/INFO.txt
@@ -1,4 +1,4 @@
 This code was generated with the following toolchain.
-F* version: 96f90842af8c0137bdee87ccb7bd3ea92485efb6
-Karamel version: 1282f04f16a4e193f329708b22e0a4577d5dd092
+F* version: 4eb223505209b81c65341e9929eae4ea4837766b
+Karamel version: 5f7e9be838ba8ec0ed3323132e6beb6276f6acf2
 Vale version: 0.3.19
diff --git a/src/wasm/WasmSupport.wasm b/src/wasm/WasmSupport.wasm
index 00717e5b..794daf78 100644
Binary files a/src/wasm/WasmSupport.wasm and b/src/wasm/WasmSupport.wasm differ
diff --git a/src/wasm/layouts.json b/src/wasm/layouts.json
index c7e414d8..d5f5b2dc 100644
--- a/src/wasm/layouts.json
+++ b/src/wasm/layouts.json
@@ -1 +1 @@
-{"Spec_Hash_Definitions_hash_alg":["LEnum"],"Prims_string":["LBuiltin",["I32"],["A32"]],"Prims_int":["LBuiltin",["I32"],["A32"]],"K___uint32_t_uint32_t":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Int",["A32"]]]]]}],"__bool_bool_bool_bool":["LFlat",{"size":4,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[3,["Int",["A8"]]]]]}],"__bool_bool":["LFlat",{"size":2,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]]]}],"K____uint64_t___uint64_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A64"]]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"K____uint32_t___uint32_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A32"]]]]],["snd",[4,["Pointer",["Int",["A32"]]]]]]}],"K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Hacl_Impl_Blake2_Core_blake2_params___uint8_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Layout","Hacl_Hash_Blake2b_blake2_params"]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_Types_error_code":["LEnum"],"Hacl_MAC_Poly1305_state_t":["LFlat",{"size":20,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]],["p_key",[16,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_MD_state_64":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Streaming_MD_state_32":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A32"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Hash_SHA3_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"hash_buf2":["LFlat",{"size":16,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["snd",[8,["Layout","Hacl_Hash_SHA3_hash_buf"]]]]}],"Hacl_Hash_SHA3_hash_buf":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2s_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____uint32_t___uint32_t_"]]]]}],"Hacl_Hash_Blake2s_Simd128_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_Simd128_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_Simd128_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_"]]]]}],"Hacl_Hash_Blake2b_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____uint64_t___uint64_t_"]]]]}],"Hacl_Hash_Blake2b_Simd256_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_Simd256_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_Simd256_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[8,["Layout","K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_"]]]]}],"Hacl_Hash_Blake2b_index":["LFlat",{"size":2,"fields":[["key_length",[0,["Int",["A8"]]]],["digest_length",[1,["Int",["A8"]]]]]}],"Hacl_Hash_SHA2_uint8_8p":["LFlat",{"size":56,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_7p"]]]]}],"Hacl_Hash_SHA2_uint8_7p":["LFlat",{"size":48,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_6p"]]]]}],"Hacl_Hash_SHA2_uint8_6p":["LFlat",{"size":40,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_5p"]]]]}],"Hacl_Hash_SHA2_uint8_5p":["LFlat",{"size":32,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_4p":["LFlat",{"size":24,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_3p"]]]]}],"Hacl_Hash_SHA2_uint8_3p":["LFlat",{"size":16,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_2p"]]]]}],"Hacl_Hash_SHA2_uint8_2x8p":["LFlat",{"size":112,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_8p"]]],["snd",[56,["Layout","Hacl_Hash_SHA2_uint8_8p"]]]]}],"Hacl_Hash_SHA2_uint8_2x4p":["LFlat",{"size":48,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_4p"]]],["snd",[24,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_2p":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Impl_HPKE_context_s":["LFlat",{"size":16,"fields":[["ctx_key",[0,["Pointer",["Int",["A8"]]]]],["ctx_nonce",[4,["Pointer",["Int",["A8"]]]]],["ctx_seq",[8,["Pointer",["Int",["A64"]]]]],["ctx_exporter",[12,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_Blake2b_blake2_params":["LFlat",{"size":28,"fields":[["digest_length",[0,["Int",["A8"]]]],["key_length",[1,["Int",["A8"]]]],["fanout",[2,["Int",["A8"]]]],["depth",[3,["Int",["A8"]]]],["leaf_length",[4,["Int",["A32"]]]],["node_offset",[8,["Int",["A64"]]]],["node_depth",[16,["Int",["A8"]]]],["inner_length",[17,["Int",["A8"]]]],["salt",[20,["Pointer",["Int",["A8"]]]]],["personal",[24,["Pointer",["Int",["A8"]]]]]]}],"Hacl_HMAC_DRBG_state":["LFlat",{"size":12,"fields":[["k",[0,["Pointer",["Int",["A8"]]]]],["v",[4,["Pointer",["Int",["A8"]]]]],["reseed_counter",[8,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64":["LFlat",{"size":20,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A64"]]]]],["mu",[8,["Int",["A64"]]]],["r2",[16,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32":["LFlat",{"size":16,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A32"]]]]],["mu",[8,["Int",["A32"]]]],["r2",[12,["Pointer",["Int",["A32"]]]]]]}],"FStar_UInt128_uint128":["LFlat",{"size":16,"fields":[["low",[0,["Int",["A64"]]]],["high",[8,["Int",["A64"]]]]]}],"EverCrypt_Hash_Incremental_state_t":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Layout","EverCrypt_Hash_state_s"]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"state_s_tags":["LEnum"],"EverCrypt_Hash_state_s":["LFlat",{"size":12,"fields":[["tag",[0,["Int",["A32"]]]],["val",[8,["Union",[["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A32"]]],["Pointer",["Unknown"]],["Pointer",["Int",["A64"]]],["Pointer",["Unknown"]]]]]]]}],"EverCrypt_Error_error_code":["LEnum"],"C_String_t_":["LBuiltin",["I32"],["A32"]],"C_String_t":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t_":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t":["LBuiltin",["I32"],["A32"]],"exit_code":["LBuiltin",["I32"],["A32"]],"clock_t":["LBuiltin",["I32"],["A32"]]}
\ No newline at end of file
+{"Spec_Hash_Definitions_hash_alg":["LEnum"],"Prims_string":["LBuiltin",["I32"],["A32"]],"Prims_int":["LBuiltin",["I32"],["A32"]],"K___uint32_t_uint32_t":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Int",["A32"]]]]]}],"__bool_bool_bool_bool":["LFlat",{"size":4,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[3,["Int",["A8"]]]]]}],"__bool_bool":["LFlat",{"size":2,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]]]}],"K____uint64_t___uint64_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A64"]]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"K____uint32_t___uint32_t_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A32"]]]]],["snd",[4,["Pointer",["Int",["A32"]]]]]]}],"K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Unknown"]]]],["snd",[4,["Pointer",["Unknown"]]]]]}],"Hacl_Streaming_Types_error_code":["LEnum"],"Hacl_MAC_Poly1305_state_t":["LFlat",{"size":20,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]],["p_key",[16,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Streaming_MD_state_64":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A64"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Streaming_MD_state_32":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Int",["A32"]]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"Hacl_Hash_SHA3_state_t":["LFlat",{"size":24,"fields":[["block_state",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["buf",[8,["Pointer",["Int",["A8"]]]]],["total_len",[16,["Int",["A64"]]]]]}],"hash_buf2":["LFlat",{"size":16,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA3_hash_buf"]]],["snd",[8,["Layout","Hacl_Hash_SHA3_hash_buf"]]]]}],"Hacl_Hash_SHA3_hash_buf":["LFlat",{"size":8,"fields":[["fst",[0,["Int",["A32"]]]],["snd",[4,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Hash_Blake2s_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____uint32_t___uint32_t_"]]]]}],"Hacl_Hash_Blake2s_Simd128_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2s_Simd128_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2s_Simd128_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_"]]]]}],"Hacl_Hash_Blake2b_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____uint64_t___uint64_t_"]]]]}],"Hacl_Hash_Blake2b_Simd256_state_t":["LFlat",{"size":32,"fields":[["block_state",[0,["Layout","Hacl_Hash_Blake2b_Simd256_block_state_t"]]],["buf",[16,["Pointer",["Int",["A8"]]]]],["total_len",[24,["Int",["A64"]]]]]}],"Hacl_Hash_Blake2b_Simd256_block_state_t":["LFlat",{"size":16,"fields":[["fst",[0,["Int",["A8"]]]],["snd",[1,["Int",["A8"]]]],["thd",[2,["Int",["A8"]]]],["f3",[8,["Layout","K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_"]]]]}],"Hacl_Hash_Blake2b_index":["LFlat",{"size":3,"fields":[["key_length",[0,["Int",["A8"]]]],["digest_length",[1,["Int",["A8"]]]],["last_node",[2,["Int",["A8"]]]]]}],"Hacl_Hash_Blake2b_params_and_key":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Layout","Hacl_Hash_Blake2b_blake2_params"]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_SHA2_uint8_8p":["LFlat",{"size":56,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_7p"]]]]}],"Hacl_Hash_SHA2_uint8_7p":["LFlat",{"size":48,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_6p"]]]]}],"Hacl_Hash_SHA2_uint8_6p":["LFlat",{"size":40,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_5p"]]]]}],"Hacl_Hash_SHA2_uint8_5p":["LFlat",{"size":32,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_4p":["LFlat",{"size":24,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_3p"]]]]}],"Hacl_Hash_SHA2_uint8_3p":["LFlat",{"size":16,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[8,["Layout","Hacl_Hash_SHA2_uint8_2p"]]]]}],"Hacl_Hash_SHA2_uint8_2x8p":["LFlat",{"size":112,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_8p"]]],["snd",[56,["Layout","Hacl_Hash_SHA2_uint8_8p"]]]]}],"Hacl_Hash_SHA2_uint8_2x4p":["LFlat",{"size":48,"fields":[["fst",[0,["Layout","Hacl_Hash_SHA2_uint8_4p"]]],["snd",[24,["Layout","Hacl_Hash_SHA2_uint8_4p"]]]]}],"Hacl_Hash_SHA2_uint8_2p":["LFlat",{"size":8,"fields":[["fst",[0,["Pointer",["Int",["A8"]]]]],["snd",[4,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Impl_HPKE_context_s":["LFlat",{"size":16,"fields":[["ctx_key",[0,["Pointer",["Int",["A8"]]]]],["ctx_nonce",[4,["Pointer",["Int",["A8"]]]]],["ctx_seq",[8,["Pointer",["Int",["A64"]]]]],["ctx_exporter",[12,["Pointer",["Int",["A8"]]]]]]}],"Hacl_Hash_Blake2b_blake2_params":["LFlat",{"size":28,"fields":[["digest_length",[0,["Int",["A8"]]]],["key_length",[1,["Int",["A8"]]]],["fanout",[2,["Int",["A8"]]]],["depth",[3,["Int",["A8"]]]],["leaf_length",[4,["Int",["A32"]]]],["node_offset",[8,["Int",["A64"]]]],["node_depth",[16,["Int",["A8"]]]],["inner_length",[17,["Int",["A8"]]]],["salt",[20,["Pointer",["Int",["A8"]]]]],["personal",[24,["Pointer",["Int",["A8"]]]]]]}],"Hacl_HMAC_DRBG_state":["LFlat",{"size":12,"fields":[["k",[0,["Pointer",["Int",["A8"]]]]],["v",[4,["Pointer",["Int",["A8"]]]]],["reseed_counter",[8,["Pointer",["Int",["A32"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u64":["LFlat",{"size":20,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A64"]]]]],["mu",[8,["Int",["A64"]]]],["r2",[16,["Pointer",["Int",["A64"]]]]]]}],"Hacl_Bignum_MontArithmetic_bn_mont_ctx_u32":["LFlat",{"size":16,"fields":[["len",[0,["Int",["A32"]]]],["n",[4,["Pointer",["Int",["A32"]]]]],["mu",[8,["Int",["A32"]]]],["r2",[12,["Pointer",["Int",["A32"]]]]]]}],"FStar_UInt128_uint128":["LFlat",{"size":16,"fields":[["low",[0,["Int",["A64"]]]],["high",[8,["Int",["A64"]]]]]}],"EverCrypt_Hash_Incremental_state_t":["LFlat",{"size":16,"fields":[["block_state",[0,["Pointer",["Layout","EverCrypt_Hash_state_s"]]]],["buf",[4,["Pointer",["Int",["A8"]]]]],["total_len",[8,["Int",["A64"]]]]]}],"state_s_tags":["LEnum"],"EverCrypt_Hash_state_s":["LFlat",{"size":12,"fields":[["tag",[0,["Int",["A32"]]]],["val",[8,["Union",[["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A32"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A64"]]],["Pointer",["Int",["A32"]]],["Pointer",["Unknown"]],["Pointer",["Int",["A64"]]],["Pointer",["Unknown"]]]]]]]}],"EverCrypt_Error_error_code":["LEnum"],"C_String_t_":["LBuiltin",["I32"],["A32"]],"C_String_t":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t_":["LBuiltin",["I32"],["A32"]],"C_Compat_String_t":["LBuiltin",["I32"],["A32"]],"exit_code":["LBuiltin",["I32"],["A32"]],"clock_t":["LBuiltin",["I32"],["A32"]]}
\ No newline at end of file
diff --git a/src/wasm/main.html b/src/wasm/main.html
index 8e3823bf..607c7d54 100644
--- a/src/wasm/main.html
+++ b/src/wasm/main.html
@@ -8,7 +8,7 @@
     <script type="application/javascript" src="./test.js"></script>
 
     <script type="application/javascript">
-      var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_SHA1", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3", "Hacl_Hash_SHA3_Simd256", "Hacl_Hash_MD5", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
+      var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_MD5", "Hacl_Hash_SHA1", "Hacl_Hash_SHA3", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3_Simd256", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
     </script>
     <script type="application/javascript" src="browser.js"></script>
     <script type="application/javascript" src="loader.js"></script>
diff --git a/src/wasm/shell.js b/src/wasm/shell.js
index 28a02c3e..462cd120 100644
--- a/src/wasm/shell.js
+++ b/src/wasm/shell.js
@@ -1,7 +1,7 @@
 
 // To be loaded by main.js
 var my_js_files = ["./test.js"];
-var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_SHA1", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3", "Hacl_Hash_SHA3_Simd256", "Hacl_Hash_MD5", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
+var my_modules = ["WasmSupport", "FStar", "LowStar_Endianness", "Hacl_Impl_Blake2_Constants", "Hacl_Lib", "Hacl_Hash_Blake2b", "Hacl_Hash_Blake2s", "Hacl_Hash_Blake2b_Simd256", "Hacl_Hash_Blake2s_Simd128", "Hacl_Hash_Base", "Hacl_Hash_MD5", "Hacl_Hash_SHA1", "Hacl_Hash_SHA3", "Hacl_Hash_SHA2", "Hacl_HMAC", "Hacl_HMAC_Blake2s_128", "Hacl_HMAC_Blake2b_256", "Hacl_Hash_SHA3_Simd256", "EverCrypt_TargetConfig", "EverCrypt", "Vale", "EverCrypt_Hash", "Hacl_Chacha20", "Hacl_Chacha20_Vec128_Hacl_Chacha20_Vec256", "Hacl_Salsa20", "Hacl_IntTypes_Intrinsics", "Hacl_Bignum_Base", "Hacl_Bignum", "Hacl_Bignum25519_51", "Hacl_Curve25519_51", "Hacl_MAC_Poly1305", "Hacl_AEAD_Chacha20Poly1305", "Hacl_Poly1305_128_Hacl_Poly1305_256_Hacl_Impl_Poly1305", "Hacl_AEAD_Chacha20Poly1305_Simd128", "Hacl_AEAD_Chacha20Poly1305_Simd256", "Hacl_Ed25519_PrecompTable", "Hacl_Ed25519", "Hacl_NaCl", "Hacl_P256_PrecompTable", "Hacl_P256", "Hacl_Bignum_K256", "Hacl_K256_PrecompTable", "Hacl_K256_ECDSA", "Hacl_HKDF", "Hacl_HPKE_Curve51_CP32_SHA256", "Hacl_HPKE_Curve51_CP32_SHA512", "Hacl_GenericField32", "Hacl_SHA2_Vec256", "Hacl_EC_K256", "Hacl_Bignum4096", "Hacl_Chacha20_Vec32", "Hacl_Bignum4096_32", "Hacl_HKDF_Blake2s_128", "Hacl_GenericField64", "Hacl_Bignum32", "Hacl_Bignum256_32", "Hacl_SHA2_Vec128", "Hacl_HMAC_DRBG", "Hacl_Bignum64", "Hacl_HKDF_Blake2b_256", "Hacl_EC_Ed25519", "Hacl_Bignum256"];
 var my_debug = false;
 
 if (typeof module !== "undefined")