Skip to content

Commit

Permalink
fix: improve parallelization
Browse files Browse the repository at this point in the history
  • Loading branch information
rymnc committed Dec 31, 2024
1 parent f280fb7 commit 41b7550
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions fuel-vm/src/interpreter/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,12 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
}

unsafe {
let mut aggregate_mask = -1i32;
let mut aggregate_mask_a = -1i32;
let mut aggregate_mask_b = -1i32;
let mut aggregate_mask_c = -1i32;
let mut aggregate_mask_d = -1i32;
let mut aggregate_mask_a_b = -1i32;
let mut aggregate_mask_c_d = -1i32;

while i + CHUNK <= len {
let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _);
Expand Down Expand Up @@ -1151,9 +1156,15 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
let cmp7 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(simd_a7, simd_b7));
let cmp8 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(simd_a8, simd_b8));

aggregate_mask &= cmp1 & cmp2 & cmp3 & cmp4 & cmp5 & cmp6 & cmp7 & cmp8;
aggregate_mask_a &= cmp1 & cmp2;
aggregate_mask_b &= cmp3 & cmp4;
aggregate_mask_c &= cmp5 & cmp6;
aggregate_mask_d &= cmp7 & cmp8;

if aggregate_mask != -1i32 {
aggregate_mask_a_b &= aggregate_mask_a & aggregate_mask_b;
aggregate_mask_c_d &= aggregate_mask_c & aggregate_mask_d;

if aggregate_mask_a_b & aggregate_mask_c_d != -1i32 {
return false;
}

Expand Down Expand Up @@ -1209,7 +1220,6 @@ fn slices_equal_avx512(a: &[u8], b: &[u8]) -> bool {
let simd_a8 = _mm512_loadu_si512(a.as_ptr().add(i + 448) as *const _);
let simd_b8 = _mm512_loadu_si512(b.as_ptr().add(i + 448) as *const _);

// Compare each pair of registers
let cmp1 = _mm512_cmpeq_epi8_mask(simd_a1, simd_b1);
let cmp2 = _mm512_cmpeq_epi8_mask(simd_a2, simd_b2);
let cmp3 = _mm512_cmpeq_epi8_mask(simd_a3, simd_b3);
Expand All @@ -1219,11 +1229,17 @@ fn slices_equal_avx512(a: &[u8], b: &[u8]) -> bool {
let cmp7 = _mm512_cmpeq_epi8_mask(simd_a7, simd_b7);
let cmp8 = _mm512_cmpeq_epi8_mask(simd_a8, simd_b8);

// Combine all comparison masks
let combined_cmp = cmp1 & cmp2 & cmp3 & cmp4 & cmp5 & cmp6 & cmp7 & cmp8;
let cmp1_2 = cmp1 & cmp2;
let cmp3_4 = cmp3 & cmp4;
let cmp5_6 = cmp5 & cmp6;
let cmp7_8 = cmp7 & cmp8;

let cmp1_4 = cmp1_2 & cmp3_4;
let cmp5_8 = cmp5_6 & cmp7_8;

let full_cmp = cmp1_4 & cmp5_8;

// Check if all bytes are equal (mask should have all bits set)
if combined_cmp != u64::MAX {
if full_cmp != u64::MAX {
return false;
}

Expand Down

0 comments on commit 41b7550

Please sign in to comment.