From 08a8d8ea0c2c517c70b6b1727afa9bdc96780dfd Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 10 Oct 2023 09:55:45 -0500
Subject: [PATCH 01/22] skip body extraction when the loop has 0 iterations

---
 circom/tests/subcmps/conv_map2idx_C.circom   | 55 ++++++++++++++++++++
 circuit_passes/src/passes/loop_unroll/mod.rs |  2 +-
 2 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 circom/tests/subcmps/conv_map2idx_C.circom

diff --git a/circom/tests/subcmps/conv_map2idx_C.circom b/circom/tests/subcmps/conv_map2idx_C.circom
new file mode 100644
index 000000000..e598c3981
--- /dev/null
+++ b/circom/tests/subcmps/conv_map2idx_C.circom
@@ -0,0 +1,55 @@
+pragma circom 2.0.0;
+
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template SegmentMulFix(nWindows) {
+    signal input e[nWindows];
+}
+
+template EscalarMulFix() {
+	//Needs at least 2 subcomp to trigger the crash
+    component segments[2];
+    for (var s = 0; s < 2; s++) {
+
+		// s = 0, nseg = 9, nWindows = 9
+		// s = 1, nseg = 4, nWindows = 6
+        var nseg = (s == 0) ? 9 : 4;
+        var nWindows = (s == 0) ? 9 : 6;
+
+        segments[s] = SegmentMulFix(nWindows);
+
+		// Needs this split loop to trigger the crash
+        for (var i = 0; i < nseg; i++) {
+			//Runs 9 times for s=0
+			//Runs 4 times for s=1
+            segments[s].e[i] <-- 999;
+        }
+        for (var i = nseg; i < nWindows; i++) {
+			//Runs 0 times for s=0		//this is the case where the extracted body is generated but shouldn't be!
+			//Runs 2 times for s=1
+            segments[s].e[i] <-- 888;
+        }
+    }
+}
+
+component main = EscalarMulFix();
+
+//CHECK-NOT: ..generated..loop.body.
+//
+//CHECK-LABEL: define void @EscalarMulFix_2_run([0 x i256]* %0)
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 999
+//CHECK: store i256 888
+//CHECK: store i256 888
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index 1fd37131a..59f193a67 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -64,7 +64,7 @@ impl<'d> LoopUnrollPass<'d> {
         }
 
         let mut block_body = vec![];
-        if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
+        if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() && recorder.get_iter() > 0 {
             // If the loop body contains more than one instruction, extract it into a new
             // function and generate 'recorder.get_iter()' number of calls to that function.
             // Otherwise, just duplicate the body 'recorder.get_iter()' number of times.

From 4b9feca5a67697e664a51d770d0a97a25b616ef1 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 24 Oct 2023 14:08:00 -0500
Subject: [PATCH 02/22] convert tab to space

---
 circom/tests/subcmps/conv_map2idx_C.circom | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/circom/tests/subcmps/conv_map2idx_C.circom b/circom/tests/subcmps/conv_map2idx_C.circom
index e598c3981..bf78759b6 100644
--- a/circom/tests/subcmps/conv_map2idx_C.circom
+++ b/circom/tests/subcmps/conv_map2idx_C.circom
@@ -8,26 +8,26 @@ template SegmentMulFix(nWindows) {
 }
 
 template EscalarMulFix() {
-	//Needs at least 2 subcomp to trigger the crash
+    //Needs at least 2 subcomp to trigger the crash
     component segments[2];
     for (var s = 0; s < 2; s++) {
 
-		// s = 0, nseg = 9, nWindows = 9
-		// s = 1, nseg = 4, nWindows = 6
+        // s = 0, nseg = 9, nWindows = 9
+        // s = 1, nseg = 4, nWindows = 6
         var nseg = (s == 0) ? 9 : 4;
         var nWindows = (s == 0) ? 9 : 6;
 
         segments[s] = SegmentMulFix(nWindows);
 
-		// Needs this split loop to trigger the crash
+        // Needs this split loop to trigger the crash
         for (var i = 0; i < nseg; i++) {
-			//Runs 9 times for s=0
-			//Runs 4 times for s=1
+            //Runs 9 times for s=0
+            //Runs 4 times for s=1
             segments[s].e[i] <-- 999;
         }
         for (var i = nseg; i < nWindows; i++) {
-			//Runs 0 times for s=0		//this is the case where the extracted body is generated but shouldn't be!
-			//Runs 2 times for s=1
+            //Runs 0 times for s=0      //this is the case where the extracted body is generated but shouldn't be!
+            //Runs 2 times for s=1
             segments[s].e[i] <-- 888;
         }
     }

From b8a70208c33974440a9e1de2fd52141727202925 Mon Sep 17 00:00:00 2001
From: Daniel Dominguez <daniel@veridise.com>
Date: Tue, 26 Sep 2023 21:39:47 +0200
Subject: [PATCH 03/22] Modify interpreter to distiguish between circom
 functions and loop body functions

---
 .../src/bucket_interpreter/env/extracted_func_env.rs      | 4 ++++
 circuit_passes/src/bucket_interpreter/env/mod.rs          | 8 ++++++++
 circuit_passes/src/bucket_interpreter/env/standard_env.rs | 4 ++++
 .../src/bucket_interpreter/env/unrolled_block_env.rs      | 4 ++++
 circuit_passes/src/bucket_interpreter/observer.rs         | 1 +
 circuit_passes/src/passes/conditional_flattening.rs       | 4 ++++
 .../src/passes/deterministic_subcomponent_invocation.rs   | 4 ++++
 .../src/passes/loop_unroll/extracted_location_updater.rs  | 2 +-
 .../src/passes/loop_unroll/loop_env_recorder.rs           | 4 ++++
 circuit_passes/src/passes/loop_unroll/mod.rs              | 4 ++++
 circuit_passes/src/passes/mapped_to_indexed.rs            | 4 ++++
 circuit_passes/src/passes/simplification.rs               | 4 ++++
 circuit_passes/src/passes/unknown_index_sanitization.rs   | 4 ++++
 13 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 57685e7e4..4e32a7760 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -47,6 +47,10 @@ impl<'a> ExtractedFuncEnvData<'a> {
         ExtractedFuncEnvData { base: Box::new(inner), remap }
     }
 
+    pub fn inside_loopbody_func_body(&self) -> bool {
+        true
+    }
+
     pub fn get_base(self) -> Env<'a> {
         *self.base
     }
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index 30e729888..72ff2fd12 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -100,6 +100,14 @@ impl LibraryAccess for Env<'_> {
 }
 
 impl<'a> Env<'a> {
+    pub fn inside_loopbody_func_body(&self) -> bool {
+        match self {
+            Env::Standard(e) => e.inside_loopbody_func_body(),
+            Env::UnrolledBlock(e) => e.inside_loopbody_func_body(),
+            Env::ExtractedFunction(e) => e.inside_loopbody_func_body()
+        }
+    }
+
     pub fn new_standard_env(libs: &'a dyn LibraryAccess) -> Self {
         Env::Standard(StandardEnvData::new(libs))
     }
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
index ab211aa02..ec2a07e3b 100644
--- a/circuit_passes/src/bucket_interpreter/env/standard_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -46,6 +46,10 @@ impl<'a> StandardEnvData<'a> {
     }
 
     // READ OPERATIONS
+    pub fn inside_loopbody_func_body(&self) -> bool {
+        false
+    }
+
     pub fn get_var(&self, idx: usize) -> Value {
         self.vars.get(&idx).unwrap_or_default().clone()
     }
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
index 23a8f4e09..411d2ec46 100644
--- a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -50,6 +50,10 @@ impl<'a> UnrolledBlockEnvData<'a> {
         UnrolledBlockEnvData { base: Box::new(base), extractor }
     }
 
+    pub fn inside_loopbody_func_body(&self) -> bool {
+        false
+    }
+
     pub fn get_var(&self, idx: usize) -> Value {
         self.base.get_var(idx)
     }
diff --git a/circuit_passes/src/bucket_interpreter/observer.rs b/circuit_passes/src/bucket_interpreter/observer.rs
index b6cc713d1..f233055f6 100644
--- a/circuit_passes/src/bucket_interpreter/observer.rs
+++ b/circuit_passes/src/bucket_interpreter/observer.rs
@@ -46,4 +46,5 @@ pub trait InterpreterObserver {
 
     fn ignore_function_calls(&self) -> bool;
     fn ignore_subcmp_calls(&self) -> bool;
+    fn ignore_loopbody_function_calls(&self) -> bool;
 }
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index a2b469cae..d696d1ffc 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -105,6 +105,10 @@ impl InterpreterObserver for ConditionalFlatteningPass<'_> {
     fn ignore_subcmp_calls(&self) -> bool {
         true
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        false
+    }
 }
 
 impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 1e9b9e5c6..1c16277f5 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -123,6 +123,10 @@ impl InterpreterObserver for DeterministicSubCmpInvokePass<'_> {
     fn ignore_subcmp_calls(&self) -> bool {
         true
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        false
+    }
 }
 
 impl CircuitTransformationPass for DeterministicSubCmpInvokePass<'_> {
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
index d20a83b9e..38c87bc93 100644
--- a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -77,7 +77,7 @@ impl ExtractedFunctionLocationUpdater {
                             //  Last here could result in the run function being called too soon.
                             //SEE: circom/tests/subcmps/subcmps0C.circom
                             input_information: InputInformation::Input {
-                                status: StatusInput::Last,
+                                status: StatusInput::Unknown, // We don't know but we need to make the subsequent passes fix this
                             },
                         },
                         dest: LocationRule::Indexed {
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 8c1a3cc9b..9781e9b48 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -284,4 +284,8 @@ impl InterpreterObserver for EnvRecorder<'_, '_> {
     fn ignore_subcmp_calls(&self) -> bool {
         true
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        true
+    }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index 59f193a67..e159dc098 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -183,6 +183,10 @@ impl InterpreterObserver for LoopUnrollPass<'_> {
     fn ignore_subcmp_calls(&self) -> bool {
         true
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        true
+    }
 }
 
 impl CircuitTransformationPass for LoopUnrollPass<'_> {
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 513fc9a1a..cdc9a2240 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -164,6 +164,10 @@ impl InterpreterObserver for MappedToIndexedPass<'_> {
     fn ignore_subcmp_calls(&self) -> bool {
         true
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        false
+    }
 }
 
 impl CircuitTransformationPass for MappedToIndexedPass<'_> {
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 81275e883..d5317ab21 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -116,6 +116,10 @@ impl InterpreterObserver for SimplificationPass<'_> {
     fn ignore_subcmp_calls(&self) -> bool {
         true
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        true
+    }
 }
 
 impl CircuitTransformationPass for SimplificationPass<'_> {
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index fa59e046b..229bea8a2 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -252,6 +252,10 @@ impl InterpreterObserver for UnknownIndexSanitizationPass<'_> {
     fn ignore_subcmp_calls(&self) -> bool {
         false
     }
+
+    fn ignore_loopbody_function_calls(&self) -> bool {
+        true // ?
+    }
 }
 
 fn do_array_union(a: &HashSet<Range<usize>>, b: &HashSet<Range<usize>>) -> HashSet<Range<usize>> {

From 0247fba50044e86b828785b1c90bcc654aa306da Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 28 Sep 2023 16:52:26 -0500
Subject: [PATCH 04/22] [VAN-676] duplicate extracted functions when flattening
 branch buckets

---
 circom/tests/loops/inner_conditional_1.circom |  98 +++++-----
 circom/tests/loops/inner_conditional_4.circom |  76 ++++----
 circom/tests/loops/inner_conditional_7.circom | 169 ++++++------------
 circom/tests/loops/inner_conditional_9.circom | 138 +++++++-------
 .../env/extracted_func_env.rs                 |  21 +--
 .../src/bucket_interpreter/env/mod.rs         |  17 +-
 .../bucket_interpreter/env/standard_env.rs    |   5 +-
 .../env/unrolled_block_env.rs                 |   5 +-
 circuit_passes/src/bucket_interpreter/mod.rs  |  16 +-
 .../src/passes/conditional_flattening.rs      | 163 +++++++++++++----
 .../src/passes/loop_unroll/body_extractor.rs  |   2 +-
 .../passes/loop_unroll/loop_env_recorder.rs   |   2 +-
 circuit_passes/src/passes/mod.rs              |  16 +-
 .../src/passes/unknown_index_sanitization.rs  |   2 +-
 14 files changed, 414 insertions(+), 316 deletions(-)

diff --git a/circom/tests/loops/inner_conditional_1.circom b/circom/tests/loops/inner_conditional_1.circom
index 85c98fca5..f76f5f361 100644
--- a/circom/tests/loops/inner_conditional_1.circom
+++ b/circom/tests/loops/inner_conditional_1.circom
@@ -14,55 +14,73 @@ template InnerConditional1(N) {
             acc -= i;
         }
     }
+    //Values at loop header per iteration
+    //  N, acc, i
+    // 10,   0, 1
+    // 10,   1, 2
+    // 10,   3, 3
+    // 10,   6, 4
+    // 10,  10, 5
+    // 10,   5, 6
+    // 10,  -1, 7
+    // 10,  -8, 8
+    // 10, -16, 9
+    // 10, -25, 10
 
     out <-- acc;
 }
 
 component main = InnerConditional1(10);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
-//CHECK-NEXT:   br label %branch1
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.F([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %fold_false1
 //CHECK-EMPTY: 
-//CHECK-NEXT: branch1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %1, i256 5)
-//CHECK-NEXT:   br i1 %call.fr_lt, label %if.then, label %if.else
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.then:
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %3, i256 %5)
-//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
-//CHECK-NEXT:   br label %if.merge
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.else:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
-//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 %10)
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_sub, i256* %11, align 4
-//CHECK-NEXT:   br label %if.merge
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.merge:
-//CHECK-NEXT:   br label %store5
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.T([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %fold_true1
 //CHECK-EMPTY: 
-//CHECK-NEXT: store5:
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
-//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %13, i256 1)
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %call.fr_add1, i256* %14, align 4
-//CHECK-NEXT:   br label %return6
+//CHECK-NEXT: fold_true1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: return6:
+//CHECK-NEXT: return3:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
@@ -89,13 +107,13 @@ component main = InnerConditional1(10);
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop4:
 //CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %4, [0 x i256]* %0)
 //CHECK-NEXT:   %5 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %5, [0 x i256]* %0)
 //CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %6, [0 x i256]* %0)
 //CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %7, [0 x i256]* %0)
 //CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
 //CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
diff --git a/circom/tests/loops/inner_conditional_4.circom b/circom/tests/loops/inner_conditional_4.circom
index 54f13c096..7480fd5e1 100644
--- a/circom/tests/loops/inner_conditional_4.circom
+++ b/circom/tests/loops/inner_conditional_4.circom
@@ -18,44 +18,52 @@ template InnerConditional4(N) {
 
 component main = InnerConditional4(6);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
-//CHECK-NEXT:   br label %branch1
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.F([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %fold_false1
 //CHECK-EMPTY: 
-//CHECK-NEXT: branch1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %1, i256 3)
-//CHECK-NEXT:   br i1 %call.fr_lt, label %if.then, label %if.else
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.then:
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
-//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
-//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %3)
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
-//CHECK-NEXT:   store i256 %call.fr_neg, i256* %4, align 4
-//CHECK-NEXT:   br label %if.merge
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.else:
-//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
-//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X2]], i32 0
-//CHECK-NEXT:   store i256 %6, i256* %7, align 4
-//CHECK-NEXT:   br label %if.merge
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.T([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %fold_true1
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.merge:
-//CHECK-NEXT:   br label %store5
+//CHECK-NEXT: fold_true1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %1)
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
-//CHECK-NEXT: store5:
-//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
-//CHECK-NEXT:   br label %return6
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: return6:
+//CHECK-NEXT: return3:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
@@ -78,13 +86,13 @@ component main = InnerConditional4(6);
 //CHECK-NEXT: unrolled_loop3:
 //CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %3, [0 x i256]* %0, i256* %4, i256* null)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %3, [0 x i256]* %0, i256* %4, i256* null)
 //CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* null)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* null)
 //CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* null)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* null)
 //CHECK-NEXT:   %9 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
 //CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* null, i256* %10)
diff --git a/circom/tests/loops/inner_conditional_7.circom b/circom/tests/loops/inner_conditional_7.circom
index 2ce359d33..43683a341 100644
--- a/circom/tests/loops/inner_conditional_7.circom
+++ b/circom/tests/loops/inner_conditional_7.circom
@@ -28,140 +28,75 @@ template InnerConditional7(N) {
 
 component main = InnerConditional7(3);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
-//CHECK-NEXT:   br label %branch1
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.F([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %fold_false1
 //CHECK-EMPTY: 
-//CHECK-NEXT: branch1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
-//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.then:
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
-//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %3)
-//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
-//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
-//CHECK-NEXT:   store i256 777, i256* %4, align 4
-//CHECK-NEXT:   br label %if.merge
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.else:
-//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
-//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 111)
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X1]], i32 0
-//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
-//CHECK-NEXT:   br label %if.merge
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.merge:
-//CHECK-NEXT:   br label %store5
-//CHECK-EMPTY: 
-//CHECK-NEXT: store5:
-//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
-//CHECK-NEXT:   br label %return6
-//CHECK-EMPTY: 
-//CHECK-NEXT: return6:
-//CHECK-NEXT:   ret void
-//CHECK-NEXT: }
-//
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
-//CHECK-NEXT:   br label %branch1
-//CHECK-EMPTY: 
-//CHECK-NEXT: branch1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
-//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.then:
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
-//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %3)
-//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
-//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
-//CHECK-NEXT:   store i256 777, i256* %4, align 4
-//CHECK-NEXT:   br label %if.merge
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.else:
-//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
-//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 111)
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X1]], i32 0
-//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
-//CHECK-NEXT:   br label %if.merge
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.merge:
-//CHECK-NEXT:   br label %store5
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %1, i256 111)
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
-//CHECK-NEXT: store5:
-//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
-//CHECK-NEXT:   br label %return6
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: return6:
+//CHECK-NEXT: return3:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
-//CHECK-NEXT:   br label %branch1
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.F([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %fold_false1
 //CHECK-EMPTY: 
-//CHECK-NEXT: branch1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
-//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %1, i256 111)
 //CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
-//CHECK-NEXT:   store i256 777, i256* %2, align 4
-//CHECK-NEXT:   br label %if.merge
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.else:
+//CHECK-NEXT: store2:
 //CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   %4 = load i256, i256* %3, align 4
-//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %4)
-//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
-//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %6)
-//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
-//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 111)
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
-//CHECK-NEXT:   store i256 %call.fr_sub, i256* %9, align 4
-//CHECK-NEXT:   br label %if.merge
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.merge:
-//CHECK-NEXT:   br label %store5
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.T([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %fold_true1
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 777, i256* %0, align 4
+//CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
-//CHECK-NEXT: store5:
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
-//CHECK-NEXT:   br label %return6
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 1)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %3, align 4
+//CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
-//CHECK-NEXT: return6:
+//CHECK-NEXT: return3:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
diff --git a/circom/tests/loops/inner_conditional_9.circom b/circom/tests/loops/inner_conditional_9.circom
index 345e26d0c..0c630d167 100644
--- a/circom/tests/loops/inner_conditional_9.circom
+++ b/circom/tests/loops/inner_conditional_9.circom
@@ -25,83 +25,91 @@ template InnerConditional9(N) {
 
 component main = InnerConditional9(4);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
-//CHECK-NEXT:   br label %branch1
-//CHECK-EMPTY: 
-//CHECK-NEXT: branch1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
-//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.then:
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.F([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %fold_false1
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
 //CHECK-NEXT:   br label %loop.cond
 //CHECK-EMPTY: 
-//CHECK-NEXT: if.else:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 0, i256* %3, align 4
-//CHECK-NEXT:   br label %loop.cond2
-//CHECK-EMPTY: 
-//CHECK-NEXT: if.merge:
-//CHECK-NEXT:   br label %store11
-//CHECK-EMPTY: 
 //CHECK-NEXT: loop.cond:
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %5, i256 4)
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %2, i256 4)
 //CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
 //CHECK-EMPTY: 
 //CHECK-NEXT: loop.body:
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %3 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %4, i256 999)
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %5, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
 //CHECK-NEXT:   %7 = load i256, i256* %6, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 999)
-//CHECK-NEXT:   %8 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 1)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
 //CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   %10 = load i256, i256* %9, align 4
 //CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %10, i256 1)
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   store i256 %call.fr_add1, i256* %11, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}.T([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %fold_true1
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %2, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %3 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 999)
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %7, i256 1)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %8, align 4
 //CHECK-NEXT:   br label %loop.cond
 //CHECK-EMPTY: 
 //CHECK-NEXT: loop.end:
-//CHECK-NEXT:   br label %if.merge
-//CHECK-EMPTY: 
-//CHECK-NEXT: loop.cond2:
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
-//CHECK-NEXT:   %call.fr_lt5 = call i1 @fr_lt(i256 %13, i256 4)
-//CHECK-NEXT:   br i1 %call.fr_lt5, label %loop.body3, label %loop.end4
-//CHECK-EMPTY: 
-//CHECK-NEXT: loop.body3:
-//CHECK-NEXT:   %14 = getelementptr i256, i256* %fix_[[X2]], i32 0
-//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
-//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %15, i256 999)
-//CHECK-NEXT:   %16 = getelementptr i256, i256* %fix_[[X1]], i32 0
-//CHECK-NEXT:   store i256 %call.fr_sub, i256* %16, align 4
-//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
-//CHECK-NEXT:   %call.fr_add6 = call i256 @fr_add(i256 %18, i256 1)
-//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 %call.fr_add6, i256* %19, align 4
-//CHECK-NEXT:   br label %loop.cond2
-//CHECK-EMPTY: 
-//CHECK-NEXT: loop.end4:
-//CHECK-NEXT:   br label %if.merge
-//CHECK-EMPTY: 
-//CHECK-NEXT: store11:
-//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   %21 = load i256, i256* %20, align 4
-//CHECK-NEXT:   %call.fr_add7 = call i256 @fr_add(i256 %21, i256 1)
-//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 %call.fr_add7, i256* %22, align 4
-//CHECK-NEXT:   br label %return12
-//CHECK-EMPTY: 
-//CHECK-NEXT: return12:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %11, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
@@ -159,13 +167,13 @@ component main = InnerConditional9(4);
 //CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i256 3
 //CHECK-NEXT:   %20 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %17, [0 x i256]* %0, i256* null, i256* null, i256* %19, i256* %21)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, i256* null, i256* null, i256* %19, i256* %21)
 //CHECK-NEXT:   %22 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0, i256 4
 //CHECK-NEXT:   %25 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %25, i32 0, i256 4
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %22, [0 x i256]* %0, i256* null, i256* null, i256* %24, i256* %26)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %22, [0 x i256]* %0, i256* null, i256* null, i256* %24, i256* %26)
 //CHECK-NEXT:   br label %store8
 //CHECK-EMPTY: 
 //CHECK-NEXT: store8:
diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 4e32a7760..28b600f44 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -3,7 +3,7 @@ use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
-use compiler::intermediate_representation::Instruction;
+use compiler::intermediate_representation::{Instruction, BucketId};
 use compiler::intermediate_representation::ir_interface::{AddressType, ValueBucket, ValueType};
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
@@ -18,6 +18,7 @@ use super::{Env, LibraryAccess};
 #[derive(Clone)]
 pub struct ExtractedFuncEnvData<'a> {
     base: Box<Env<'a>>,
+    caller: BucketId,
     remap: ToOriginalLocation,
 }
 
@@ -43,12 +44,12 @@ impl LibraryAccess for ExtractedFuncEnvData<'_> {
 //  AddressType::SubcmpSignal references created by ExtractedFunctionLocationUpdater
 //  back into the proper reference to access the correct Env entry.
 impl<'a> ExtractedFuncEnvData<'a> {
-    pub fn new(inner: Env<'a>, remap: ToOriginalLocation) -> Self {
-        ExtractedFuncEnvData { base: Box::new(inner), remap }
+    pub fn new(inner: Env<'a>, caller: &BucketId, remap: ToOriginalLocation) -> Self {
+        ExtractedFuncEnvData { base: Box::new(inner), caller: caller.clone(), remap }
     }
 
-    pub fn inside_loopbody_func_body(&self) -> bool {
-        true
+    pub fn extracted_func_caller(&self) -> Option<&BucketId> {
+        Some(&self.caller)
     }
 
     pub fn get_base(self) -> Env<'a> {
@@ -205,17 +206,17 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn set_var(self, idx: usize, value: Value) -> Self {
         // Local variables are referenced in the normal way
-        ExtractedFuncEnvData { base: Box::new(self.base.set_var(idx, value)), remap: self.remap }
+        ExtractedFuncEnvData::new(self.base.set_var(idx, value), &self.caller, self.remap)
     }
 
     pub fn set_signal(self, idx: usize, value: Value) -> Self {
         // Signals are referenced in the normal way
-        ExtractedFuncEnvData { base: Box::new(self.base.set_signal(idx, value)), remap: self.remap }
+        ExtractedFuncEnvData::new(self.base.set_signal(idx, value), &self.caller, self.remap)
     }
 
     pub fn set_all_to_unk(self) -> Self {
         // Local variables are referenced in the normal way
-        ExtractedFuncEnvData { base: Box::new(self.base.set_all_to_unk()), remap: self.remap }
+        ExtractedFuncEnvData::new(self.base.set_all_to_unk(), &self.caller, self.remap)
     }
 
     pub fn set_subcmp_to_unk(self, _subcmp_idx: usize) -> Self {
@@ -252,7 +253,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
+        ExtractedFuncEnvData::new(new_env, &self.caller, self.remap)
     }
 
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
@@ -279,7 +280,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
+        ExtractedFuncEnvData::new(new_env, &self.caller, self.remap)
     }
 
     pub fn run_subcmp(
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index 72ff2fd12..647e3b201 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -3,6 +3,7 @@ use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
+use compiler::intermediate_representation::BucketId;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::loop_unroll::body_extractor::{LoopBodyExtractor, ToOriginalLocation};
@@ -100,11 +101,11 @@ impl LibraryAccess for Env<'_> {
 }
 
 impl<'a> Env<'a> {
-    pub fn inside_loopbody_func_body(&self) -> bool {
+    pub fn extracted_func_caller(&self) -> Option<&BucketId> {
         match self {
-            Env::Standard(e) => e.inside_loopbody_func_body(),
-            Env::UnrolledBlock(e) => e.inside_loopbody_func_body(),
-            Env::ExtractedFunction(e) => e.inside_loopbody_func_body()
+            Env::Standard(e) => e.extracted_func_caller(),
+            Env::UnrolledBlock(e) => e.extracted_func_caller(),
+            Env::ExtractedFunction(e) => e.extracted_func_caller(),
         }
     }
 
@@ -116,8 +117,12 @@ impl<'a> Env<'a> {
         Env::UnrolledBlock(UnrolledBlockEnvData::new(inner, extractor))
     }
 
-    pub fn new_extracted_func_env(inner: Env<'a>, remap: ToOriginalLocation) -> Self {
-        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner, remap))
+    pub fn new_extracted_func_env(
+        inner: Env<'a>,
+        caller: &BucketId,
+        remap: ToOriginalLocation,
+    ) -> Self {
+        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner, caller, remap))
     }
 
     pub fn peel_extracted_func(self) -> Self {
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
index ec2a07e3b..d47c6be05 100644
--- a/circuit_passes/src/bucket_interpreter/env/standard_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -3,6 +3,7 @@ use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
+use compiler::intermediate_representation::BucketId;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
 use super::{SubcmpEnv, LibraryAccess};
@@ -46,8 +47,8 @@ impl<'a> StandardEnvData<'a> {
     }
 
     // READ OPERATIONS
-    pub fn inside_loopbody_func_body(&self) -> bool {
-        false
+    pub fn extracted_func_caller(&self) -> Option<&BucketId> {
+        None
     }
 
     pub fn get_var(&self, idx: usize) -> Value {
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
index 411d2ec46..7f52ca72d 100644
--- a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -3,6 +3,7 @@ use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
+use compiler::intermediate_representation::BucketId;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::loop_unroll::LOOP_BODY_FN_PREFIX;
@@ -50,8 +51,8 @@ impl<'a> UnrolledBlockEnvData<'a> {
         UnrolledBlockEnvData { base: Box::new(base), extractor }
     }
 
-    pub fn inside_loopbody_func_body(&self) -> bool {
-        false
+    pub fn extracted_func_caller(&self) -> Option<&BucketId> {
+        None
     }
 
     pub fn get_var(&self, idx: usize) -> Value {
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 18998bdcb..e3896950e 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -417,7 +417,13 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         (computed_value, env)
     }
 
-    fn run_function_loopbody<'env>(&self, name: &String, env: Env<'env>, observe: bool) -> R<'env> {
+    fn run_function_loopbody<'env>(
+        &self,
+        bucket: &'env CallBucket,
+        env: Env<'env>,
+        observe: bool,
+    ) -> R<'env> {
+        let name = &bucket.symbol;
         if cfg!(debug_assertions) {
             println!("Running function {}", name);
         };
@@ -425,9 +431,9 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
             None,
             Env::new_extracted_func_env(
                 env.clone(),
+                &bucket.id,
                 if name.starts_with(LOOP_BODY_FN_PREFIX) {
-                    self.global_data.borrow().extract_func_orig_loc[name][&env.get_vars_sort()]
-                        .clone()
+                    self.global_data.borrow().get_data_for_func(name)[&env.get_vars_sort()].clone()
                 } else {
                     Default::default()
                 },
@@ -436,7 +442,7 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         //NOTE: Do not change scope for the new interpreter because the mem lookups within
         //  `get_write_operations_in_store_bucket` need to use the original function context.
         let interp = self.mem.build_interpreter(self.global_data, self.observer);
-        let observe = observe && !interp.observer.ignore_function_calls();
+        let observe = observe && !interp.observer.ignore_loopbody_function_calls();
         let instructions = &env.get_function(name).body;
         unsafe {
             let ptr = instructions.as_ptr();
@@ -480,7 +486,7 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
             // The extracted loop body and array parameter functions can change any values in
             //  the environment via the parameters passed to it. So interpret the function and
             //  keep the resulting Env (as if the function had executed inline).
-            self.run_function_loopbody(&bucket.symbol, env, observe)
+            self.run_function_loopbody(&bucket, env, observe)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index d696d1ffc..6f3ea599f 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -1,19 +1,34 @@
 use std::cell::RefCell;
-use std::collections::BTreeMap;
+use std::collections::{HashMap, BTreeMap};
+use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
-use compiler::intermediate_representation::{InstructionPointer, new_id};
+use compiler::intermediate_representation::{InstructionPointer, new_id, BucketId};
 use compiler::intermediate_representation::ir_interface::*;
-use crate::bucket_interpreter::env::Env;
+use indexmap::IndexMap;
+use crate::bucket_interpreter::env::{Env, LibraryAccess};
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use super::{CircuitTransformationPass, GlobalPassData};
 
+type BranchValues = BTreeMap<BucketId, Option<bool>>;
+
 pub struct ConditionalFlatteningPass<'d> {
     global_data: &'d RefCell<GlobalPassData>,
-    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
-    replacements: RefCell<BTreeMap<BranchBucket, bool>>,
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
+    //
+    /// Maps the ID of the CallBucket that is currently on the interpreter's stack (or None if the
+    /// interpreter is currently analyzing code that is not in one of the generated loopbody functions)
+    /// to a list of (ID, evaluated condition) pairs for the BranchBuckets in the current context.
+    evaluated_conditions: RefCell<HashMap<Option<BucketId>, BranchValues>>,
+    /// Maps CallBucket symbol (i.e. target function name) to BranchBucket value mapping to the
+    /// new function that has brances simplified according to that mapping.
+    /// NOTE: Uses IndexMap to preserve insertion order to stabilize lit test output.
+    new_functions: RefCell<IndexMap<String, BTreeMap<BranchValues, FunctionCode>>>,
+    /// Within the CircuitTransformationPass impl below, this holds the BranchBucket
+    /// condition for when the function is called by the current CallBucket.
+    caller_context: RefCell<Option<BranchValues>>,
 }
 
 impl<'d> ConditionalFlatteningPass<'d> {
@@ -21,7 +36,11 @@ impl<'d> ConditionalFlatteningPass<'d> {
         ConditionalFlatteningPass {
             global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
-            replacements: Default::default(),
+            evaluated_conditions: Default::default(),
+            new_functions: Default::default(),
+            //The None key in this map is for the cases that are NOT inside the loopbody functions. When
+            // traversal enters a loopbody function, this will change to the BranchValues of that CallBucket.
+            caller_context: RefCell::new(None),
         }
     }
 }
@@ -84,9 +103,18 @@ impl InterpreterObserver for ConditionalFlatteningPass<'_> {
             env.clone(),
             false,
         );
-        if cond_result.is_some() {
-            self.replacements.borrow_mut().insert(bucket.clone(), cond_result.unwrap());
-        }
+        // Store the result for the current bucket in the list for the current caller.
+        // NOTE: Store 'cond_result' even when it is None (meaning the BranchBucket
+        //  condition could not be determined) so that it will fully differentiate the
+        //  branching behavior of functions called at multiple sites.
+        let in_func = env.extracted_func_caller().map(|n| n.clone());
+        // NOTE: 'in_func' is None when the current branch is NOT located within a function
+        //  that was generated during loop unrolling to hold the body of a loop.
+        self.evaluated_conditions
+            .borrow_mut()
+            .entry(in_func)
+            .or_default()
+            .insert(bucket.id, cond_result);
         true
     }
 
@@ -124,35 +152,108 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
         self.memory.fill_from_circuit(circuit);
     }
 
+    fn post_hook_circuit(&self, cir: &mut Circuit) {
+        // Add the new functions
+        for (_, ev) in self.new_functions.borrow_mut().drain(..) {
+            for f in ev.into_values() {
+                cir.functions.push(f);
+            }
+        }
+    }
+
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
         self.memory.run_template(self.global_data, self, template);
     }
 
+    fn transform_call_bucket(&self, bucket: &CallBucket) -> InstructionPointer {
+        let call_bucket_id = Some(bucket.id);
+        // NOTE: This borrow is inside brackets to prevent runtime double borrow error.
+        let ec = { self.evaluated_conditions.borrow_mut().remove(&call_bucket_id) };
+        // The Some keys in this map are for the cases that are inside the loopbody functions.
+        if let Some(ev) = ec {
+            // If there are any conditions that evaluated to a known value, replace the
+            //  CallBucket target function with a simplified version of that function.
+            if ev.values().any(|e| e.is_some()) {
+                let mut nf = self.new_functions.borrow_mut();
+                // Check if the needed function exists, else create it.
+                let old_name = &bucket.symbol;
+                let new_name = ev.values().into_iter().fold(old_name.clone(), |acc, e| match e {
+                    Some(true) => format!("{}.T", acc),
+                    Some(false) => format!("{}.F", acc),
+                    None => format!("{}.N", acc),
+                });
+                let new_target = nf
+                    .entry(bucket.symbol.clone())
+                    .or_default()
+                    .entry(ev)
+                    .or_insert_with_key(|k| {
+                        //Set the 'within_call' context and then use self.transform_function(..)
+                        //  on the existing extracted loopbody function to create a new
+                        //  FunctionCode by running this transformer on the existing one.
+                        let old = self.caller_context.replace(Some(k.clone()));
+                        let mut res = self.transform_function(&self.memory.get_function(old_name));
+                        self.caller_context.replace(old);
+                        res.header = new_name;
+                        res
+                    })
+                    .header
+                    .clone();
+                return CallBucket {
+                    id: new_id(),
+                    source_file_id: bucket.source_file_id,
+                    line: bucket.line,
+                    message_id: bucket.message_id,
+                    symbol: new_target,
+                    argument_types: bucket.argument_types.clone(),
+                    arguments: self.transform_instructions(&bucket.arguments),
+                    arena_size: bucket.arena_size,
+                    return_info: self.transform_return_type(&bucket.return_info),
+                }
+                .allocate();
+            }
+        }
+        // Default case: no change
+        CallBucket {
+            id: new_id(),
+            source_file_id: bucket.source_file_id,
+            line: bucket.line,
+            message_id: bucket.message_id,
+            symbol: bucket.symbol.to_string(),
+            argument_types: bucket.argument_types.clone(),
+            arguments: self.transform_instructions(&bucket.arguments),
+            arena_size: bucket.arena_size,
+            return_info: self.transform_return_type(&bucket.return_info),
+        }
+        .allocate()
+    }
+
     fn transform_branch_bucket(&self, bucket: &BranchBucket) -> InstructionPointer {
-        if let Some(side) = self.replacements.borrow().get(&bucket) {
-            let code = if *side { &bucket.if_branch } else { &bucket.else_branch };
-            let block = BlockBucket {
-                id: new_id(),
-                source_file_id: bucket.source_file_id,
-                line: bucket.line,
-                message_id: bucket.message_id,
-                body: code.clone(),
-                n_iters: 1,
-                label: format!("fold_{}", side),
-            };
-            self.transform_block_bucket(&block)
-        } else {
-            BranchBucket {
-                id: new_id(),
-                source_file_id: bucket.source_file_id,
-                line: bucket.line,
-                message_id: bucket.message_id,
-                cond: self.transform_instruction(&bucket.cond),
-                if_branch: self.transform_instructions(&bucket.if_branch),
-                else_branch: self.transform_instructions(&bucket.else_branch),
+        if let Some(bv) = self.caller_context.borrow().as_ref() {
+            if let Some(Some(side)) = bv.get(&bucket.id) {
+                let code = if *side { &bucket.if_branch } else { &bucket.else_branch };
+                let block = BlockBucket {
+                    id: new_id(),
+                    source_file_id: bucket.source_file_id,
+                    line: bucket.line,
+                    message_id: bucket.message_id,
+                    body: code.clone(),
+                    n_iters: 1,
+                    label: format!("fold_{}", side),
+                };
+                return self.transform_block_bucket(&block);
             }
-            .allocate()
         }
+        // Default case: no change
+        BranchBucket {
+            id: new_id(),
+            source_file_id: bucket.source_file_id,
+            line: bucket.line,
+            message_id: bucket.message_id,
+            cond: self.transform_instruction(&bucket.cond),
+            if_branch: self.transform_instructions(&bucket.if_branch),
+            else_branch: self.transform_instructions(&bucket.else_branch),
+        }
+        .allocate()
     }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index 3bfef2689..cc52854e3 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -300,7 +300,7 @@ impl LoopBodyExtractor {
         // NOTE: starts at 2 because the current component's signal arena and lvars are first.
         let mut next_idx: FuncArgIdx = 2;
         // First step is to collect all location references into the 'bucket_to_itr_to_ref' table.
-        // NOTE: collect to IndexSet to preserve insertion order to stabilize test output.
+        // NOTE: Uses IndexSet to preserve insertion order to stabilize lit test output.
         let all_loadstore_bucket_ids: IndexSet<&BucketId> =
             vpi.values().flat_map(|x| x.loadstore_to_index.keys()).collect();
         for id in all_loadstore_bucket_ids {
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 9781e9b48..9adfca74e 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -15,7 +15,7 @@ use super::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
 pub struct VariableValues<'a> {
     pub env_at_header: Env<'a>,
     /// The key is the ID of the load/store bucket where the reference is located.
-    /// NOTE: uses IndexMap to preserve insertion order to stabilize test output.
+    /// NOTE: Uses IndexMap to preserve insertion order to stabilize lit test output.
     pub loadstore_to_index: IndexMap<BucketId, (AddressType, Value)>,
 }
 
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 5e105b757..bc4c39fda 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -429,13 +429,27 @@ pub struct GlobalPassData {
     /// (from Env::get_vars_sort) to location reference in the original function. Used
     /// by ExtractedFuncEnvData to access the original function's Env via the extracted
     /// function's parameter references.
-    pub extract_func_orig_loc: HashMap<String, BTreeMap<UnrolledIterLvars, ToOriginalLocation>>,
+    extract_func_orig_loc: HashMap<String, BTreeMap<UnrolledIterLvars, ToOriginalLocation>>,
 }
 
 impl GlobalPassData {
     pub fn new() -> GlobalPassData {
         GlobalPassData { extract_func_orig_loc: Default::default() }
     }
+
+    pub fn get_data_for_func(
+        &self,
+        name: &String,
+    ) -> &BTreeMap<UnrolledIterLvars, ToOriginalLocation> {
+        match self.extract_func_orig_loc.get(name) {
+            Some(x) => x,
+            None => {
+                // Allow for the suffix(es) added by ConditionalFlatteningPass
+                let name = name.trim_end_matches(&['.', 'T', 'F', 'N']);
+                self.extract_func_orig_loc.get(name).unwrap()
+            }
+        }
+    }
 }
 
 pub struct PassManager {
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index 229bea8a2..e60a75274 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -254,7 +254,7 @@ impl InterpreterObserver for UnknownIndexSanitizationPass<'_> {
     }
 
     fn ignore_loopbody_function_calls(&self) -> bool {
-        true // ?
+        true
     }
 }
 

From 59ccc3bc97d96fd2ad2c6c856bc35ad58315e581 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 3 Oct 2023 15:44:24 -0500
Subject: [PATCH 05/22] [VAN-670] generate and flatten subcmp counter/run

---
 circom/tests/loops/assign_in_loop.circom      | 186 +++++++
 circom/tests/loops/call_inside_loop.circom    |  35 +-
 .../tests/loops/inner_conditional_10.circom   |   4 +
 .../tests/loops/inner_conditional_11.circom   | 142 ++++++
 circom/tests/subcmps/mapped.circom            |   3 +
 circom/tests/subcmps/mapped2.circom           |   3 +
 circom/tests/subcmps/subcmps0A.circom         |  45 +-
 circom/tests/subcmps/subcmps0B.circom         |  53 +-
 circom/tests/subcmps/subcmps0C.circom         |  45 +-
 circom/tests/subcmps/subcmps0D.circom         |  66 ++-
 circom/tests/subcmps/subcmps1.circom          |  51 +-
 circom/tests/subcmps/subcmps2.circom          | 468 +++++++++---------
 circom/tests/subcmps/subcmps3.circom          | 231 +++++++++
 .../env/extracted_func_env.rs                 | 197 +++++---
 .../src/bucket_interpreter/env/mod.rs         |  35 +-
 .../bucket_interpreter/env/standard_env.rs    |  32 +-
 .../env/unrolled_block_env.rs                 |  11 +
 circuit_passes/src/bucket_interpreter/mod.rs  |  31 +-
 .../src/passes/conditional_flattening.rs      |  36 +-
 .../src/passes/loop_unroll/body_extractor.rs  | 108 ++--
 .../loop_unroll/extracted_location_updater.rs | 241 ++++++---
 .../passes/loop_unroll/loop_env_recorder.rs   |  33 +-
 circuit_passes/src/passes/loop_unroll/mod.rs  |  20 +
 circuit_passes/src/passes/mod.rs              |   7 +-
 code_producers/src/llvm_elements/functions.rs |  29 +-
 code_producers/src/llvm_elements/mod.rs       |   1 +
 code_producers/src/llvm_elements/stdlib.rs    |  16 +-
 code_producers/src/llvm_elements/template.rs  |   1 +
 .../ir_interface.rs                           |  30 ++
 .../load_bucket.rs                            |   4 +-
 .../store_bucket.rs                           |  76 ++-
 31 files changed, 1625 insertions(+), 615 deletions(-)
 create mode 100644 circom/tests/loops/assign_in_loop.circom
 create mode 100644 circom/tests/loops/inner_conditional_11.circom
 create mode 100644 circom/tests/subcmps/subcmps3.circom

diff --git a/circom/tests/loops/assign_in_loop.circom b/circom/tests/loops/assign_in_loop.circom
new file mode 100644
index 000000000..bcfa191a6
--- /dev/null
+++ b/circom/tests/loops/assign_in_loop.circom
@@ -0,0 +1,186 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.* // panicked at 'not yet implemented', circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs:149:44 (LocationRule::Mapped case)
+
+template Inner(i) {
+    signal input in;
+    signal output out;
+    
+    out <-- (in >> i) & 1;
+}
+
+template Num2Bits(n) {
+    signal input in;
+    signal output out[n];
+    
+    component c[n];
+    for (var i = 0; i < n; i++) {
+    	c[i] = Inner(i);
+    	c[i].in <-- in;
+    	out[i] <-- c[i].out;
+    }
+}
+
+component main = Num2Bits(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]],
+//CHECK-SAME: [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X3]], i256* %subc_[[X3]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
+//CHECK-NEXT:   call void @Inner_?_run([0 x i256]* %sub_[[X1]])                 //TODO: which function to call depends on which iteration of the loop
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X3]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Inner_0_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_0_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_1_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_1_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_2_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_2_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Num2Bits_3_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [4 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 1, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [4 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Num2Bits_3_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [3 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Inner_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %create_cmp3
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp3:
+//CHECK-NEXT:   %3 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Inner_1_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   br label %create_cmp4
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp4:
+//CHECK-NEXT:   %4 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
+//CHECK-NEXT:   call void @Inner_2_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop6
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop6:
+//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %12 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
+//CHECK-NEXT:   %16 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
+//CHECK-NEXT:   %19 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
+//CHECK-NEXT:   %21 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
+//CHECK-NEXT:   %24 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20, [0 x i256]* %23, i256* %25)
+//CHECK-NEXT:   %26 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
+//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 1
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %32 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %33 = load [0 x i256]*, [0 x i256]** %32, align 8
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %33, i32 0
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0, i256 0
+//CHECK-NEXT:   %36 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %37 = load [0 x i256]*, [0 x i256]** %36, align 8
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0
+//CHECK-NEXT:   %39 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %40 = bitcast i32* %39 to i256*
+//CHECK-NEXT:   %41 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %42 = load [0 x i256]*, [0 x i256]** %41, align 8
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %42, i32 0
+//CHECK-NEXT:   %44 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %45 = bitcast i32* %44 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %26, [0 x i256]* %0, i256* %30, i256* %31, i256* %35, [0 x i256]* %38, i256* %40, [0 x i256]* %43, i256* %45)
+//CHECK-NEXT:   %46 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %47 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %48 = load [0 x i256]*, [0 x i256]** %47, align 8
+//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %48, i32 0
+//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %49, i32 0, i256 1
+//CHECK-NEXT:   %51 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %52 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %53 = load [0 x i256]*, [0 x i256]** %52, align 8
+//CHECK-NEXT:   %54 = getelementptr [0 x i256], [0 x i256]* %53, i32 0
+//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %54, i32 0, i256 0
+//CHECK-NEXT:   %56 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %57 = load [0 x i256]*, [0 x i256]** %56, align 8
+//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %57, i32 0
+//CHECK-NEXT:   %59 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %60 = bitcast i32* %59 to i256*
+//CHECK-NEXT:   %61 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %62 = load [0 x i256]*, [0 x i256]** %61, align 8
+//CHECK-NEXT:   %63 = getelementptr [0 x i256], [0 x i256]* %62, i32 0
+//CHECK-NEXT:   %64 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %65 = bitcast i32* %64 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %46, [0 x i256]* %0, i256* %50, i256* %51, i256* %55, [0 x i256]* %58, i256* %60, [0 x i256]* %63, i256* %65)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/call_inside_loop.circom b/circom/tests/loops/call_inside_loop.circom
index 912ce3287..3a4375cd7 100644
--- a/circom/tests/loops/call_inside_loop.circom
+++ b/circom/tests/loops/call_inside_loop.circom
@@ -70,7 +70,7 @@ component main = CallInLoop(2, 3);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
 //CHECK-NEXT:   br label %call1
 //CHECK-EMPTY: 
@@ -95,21 +95,16 @@ component main = CallInLoop(2, 3);
 //CHECK-NEXT:   store i256 3, i256* %8, align 4
 //CHECK-NEXT:   %9 = bitcast [15 x i256]* %fun_0_arena to i256*
 //CHECK-NEXT:   %call.fun_0 = call i256 @fun_0(i256* %9)
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
-//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %11)
-//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
-//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 4
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
-//CHECK-NEXT:   store i256 %call.fun_0, i256* %12, align 4
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fun_0, i256* %10, align 4
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 1)
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 1)
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %13, align 4
 //CHECK-NEXT:   br label %return3
 //CHECK-EMPTY: 
 //CHECK-NEXT: return3:
@@ -175,16 +170,20 @@ component main = CallInLoop(2, 3);
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop10:
 //CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0)
 //CHECK-NEXT:   %16 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %16, [0 x i256]* %0)
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %16, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0, i256* %17)
+//CHECK-NEXT:   %18 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0, i256 5
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %18, [0 x i256]* %0, i256* %20)
 //CHECK-NEXT:   br label %store11
 //CHECK-EMPTY: 
 //CHECK-NEXT: store11:
-//CHECK-NEXT:   %17 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
-//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:   store i256 %18, i256* %19, align 4
+//CHECK-NEXT:   %21 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %22 = load i256, i256* %21, align 4
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %22, i256* %23, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_conditional_10.circom b/circom/tests/loops/inner_conditional_10.circom
index 066ad245c..27cf80562 100644
--- a/circom/tests/loops/inner_conditional_10.circom
+++ b/circom/tests/loops/inner_conditional_10.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.* // TODO: branch conditions are not be flattened fully for some reason
 
 template Sigma() {
     signal input inp;
@@ -12,6 +13,9 @@ template Poseidon() {
 
     component sigmaF[2];
 
+    // NOTE: When processing the loop, the statements indexed with 'k' are determined
+    //  NOT safe to move into a new function since 'k' is unknown. That results in
+    //  the loop unrolling in place.
     for (var i=0; i<4; i++) {
         if (i < 1 || i >= 3) {
             var k = i < 1 ? 0 : 1;
diff --git a/circom/tests/loops/inner_conditional_11.circom b/circom/tests/loops/inner_conditional_11.circom
new file mode 100644
index 000000000..a18a080ee
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_11.circom
@@ -0,0 +1,142 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Sigma() {
+    signal input inp;
+    signal output out;
+}
+
+// Equivalent to inner_conditional_10 but refactored to allow the loop body to be extracted to a new function. 
+template Poseidon() {
+    signal input inp;
+
+    component sigmaF[2];
+
+    for (var i=0; i<4; i++) {
+        if (i < 1) {
+            sigmaF[0] = Sigma();
+            sigmaF[0].inp <== inp;
+        } else if (i >= 3) {
+            sigmaF[1] = Sigma();
+            sigmaF[1].inp <== inp;
+        }
+    }
+}
+
+component main = Poseidon();
+
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.F\.F}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.F\.F]]:
+//CHECK-NEXT:   br label %fold_false1
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %1, i256 1)
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %2, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.F\.T}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.F\.T]]:
+//CHECK-NEXT:   br label %fold_false1
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_false1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 1)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME:  i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %fold_true1
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 1)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Poseidon_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Sigma_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Sigma_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %4 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %4, [0 x i256]* %0, i256* %8, i256* null)
+//CHECK-NEXT:   %9 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* null, i256* null)
+//CHECK-NEXT:   %10 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, i256* null, i256* null)
+//CHECK-NEXT:   %11 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %11, [0 x i256]* %0, i256* null, i256* %15)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index 69c44a397..b089cb571 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -41,3 +41,6 @@ template B(n) {
 }
 
 component main = B(2);
+
+//TODO: This check is a filler just to capture when the test no longer crashes
+//CHECK: declare void @llvm.donothing()
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index 41b3d479c..2e542d7a1 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -57,3 +57,6 @@ template B(n, m, j) {
 }
 
 component main = B(2, 3, 2);
+
+//TODO: This check is a filler just to capture when the test no longer crashes
+//CHECK: declare void @llvm.donothing()
diff --git a/circom/tests/subcmps/subcmps0A.circom b/circom/tests/subcmps/subcmps0A.circom
index 4f33801b3..80e497bc3 100644
--- a/circom/tests/subcmps/subcmps0A.circom
+++ b/circom/tests/subcmps/subcmps0A.circom
@@ -24,10 +24,10 @@ template SubCmps0A(n) {
 
 component main = SubCmps0A(2);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
-//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals, 
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.T]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -38,26 +38,33 @@ component main = SubCmps0A(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
-//CHECK-NEXT:   br label %store3
+//CHECK-NEXT:   %3 = load i256, i256* %subc_[[X4]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 1)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subc_[[X4]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %4, align 4
+//CHECK-NEXT:   br label %fold_true3
 //CHECK-EMPTY: 
-//CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
-//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT: fold_true3:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
-//CHECK-NEXT:   br label %return5
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
 //CHECK-EMPTY: 
-//CHECK-NEXT: return5:
+//CHECK-NEXT: return6:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
diff --git a/circom/tests/subcmps/subcmps0B.circom b/circom/tests/subcmps/subcmps0B.circom
index f858f60e8..c73f889ab 100644
--- a/circom/tests/subcmps/subcmps0B.circom
+++ b/circom/tests/subcmps/subcmps0B.circom
@@ -25,10 +25,10 @@ template SubCmps0B(n) {
 
 component main = SubCmps0B(2);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
-//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals, 
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.T]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -39,33 +39,40 @@ component main = SubCmps0B(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
-//CHECK-NEXT:   br label %store3
+//CHECK-NEXT:   %3 = load i256, i256* %subc_[[X5]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 1)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subc_[[X5]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %4, align 4
+//CHECK-NEXT:   br label %fold_true3
 //CHECK-EMPTY: 
-//CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
-//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT: fold_true3:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %subfix_[[X5]], i32 0
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %8, i256* %9, align 4
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
 //CHECK-NEXT:   br label %store5
 //CHECK-EMPTY: 
 //CHECK-NEXT: store5:
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
-//CHECK-NEXT:   br label %return6
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %subfix_[[X5]], i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %9, i256* %10, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 1)
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %13, align 4
+//CHECK-NEXT:   br label %return7
 //CHECK-EMPTY: 
-//CHECK-NEXT: return6:
+//CHECK-NEXT: return7:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
diff --git a/circom/tests/subcmps/subcmps0C.circom b/circom/tests/subcmps/subcmps0C.circom
index c62cdff92..ecf098e96 100644
--- a/circom/tests/subcmps/subcmps0C.circom
+++ b/circom/tests/subcmps/subcmps0C.circom
@@ -23,10 +23,10 @@ template SubCmps0C(n) {
 
 component main = SubCmps0C(2);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
-//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals, 
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.T]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -37,26 +37,33 @@ component main = SubCmps0C(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
-//CHECK-NEXT:   br label %store3
+//CHECK-NEXT:   %3 = load i256, i256* %subc_[[X4]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 1)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subc_[[X4]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %4, align 4
+//CHECK-NEXT:   br label %fold_true3
 //CHECK-EMPTY: 
-//CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
-//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT: fold_true3:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
-//CHECK-NEXT:   br label %return5
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
 //CHECK-EMPTY: 
-//CHECK-NEXT: return5:
+//CHECK-NEXT: return6:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
diff --git a/circom/tests/subcmps/subcmps0D.circom b/circom/tests/subcmps/subcmps0D.circom
index c2eeee5f5..c9dee689b 100644
--- a/circom/tests/subcmps/subcmps0D.circom
+++ b/circom/tests/subcmps/subcmps0D.circom
@@ -24,10 +24,10 @@ template SubCmps0D(n) {
 
 component main = SubCmps0D(3);
 
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]],
-//CHECK-SAME: i256* %fix_[[X4:[0-9]+]], i256* %fix_[[X5:[0-9]+]], i256* %subfix_[[X6:[0-9]+]], [0 x i256]* %sub_[[X6]], i256* %subc_[[X6]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.F\.T}}([0 x i256]* %lvars, [0 x i256]* %signals, 
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X5:[0-9]+]], i256* %subfix_[[X6:[0-9]+]], [0 x i256]* %sub_[[X6]], i256* %subc_[[X6]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.F\.T]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -38,38 +38,50 @@ component main = SubCmps0D(3);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
-//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
-//CHECK-NEXT:   br label %store3
+//CHECK-NEXT:   %3 = load i256, i256* %subc_[[X6]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 1)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subc_[[X6]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %4, align 4
+//CHECK-NEXT:   br label %fold_false3
 //CHECK-EMPTY: 
-//CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X4]], i32 0
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %subfix_[[X3]], i32 0
-//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT: fold_false3:
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
-//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %subfix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
 //CHECK-NEXT:   br label %store5
 //CHECK-EMPTY: 
 //CHECK-NEXT: store5:
-//CHECK-NEXT:   %8 = getelementptr i256, i256* %subfix_[[X6]], i32 0
-//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
-//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X5]], i32 0
-//CHECK-NEXT:   store i256 %9, i256* %10, align 4
-//CHECK-NEXT:   br label %store6
-//CHECK-EMPTY: 
-//CHECK-NEXT: store6:
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 1)
+//CHECK-NEXT:   %8 = load i256, i256* %subc_[[X6]], align 4
+//CHECK-NEXT:   %call.fr_sub1 = call i256 @fr_sub(i256 %8, i256 1)
+//CHECK-NEXT:   %9 = getelementptr i256, i256* %subc_[[X6]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub1, i256* %9, align 4
+//CHECK-NEXT:   br label %fold_true6
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true6:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %subfix_[[X6]], i32 0
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %12 = getelementptr i256, i256* %fix_[[X5]], i32 0
+//CHECK-NEXT:   store i256 %11, i256* %12, align 4
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %13, align 4
-//CHECK-NEXT:   br label %return7
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %return9
 //CHECK-EMPTY: 
-//CHECK-NEXT: return7:
+//CHECK-NEXT: return9:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
diff --git a/circom/tests/subcmps/subcmps1.circom b/circom/tests/subcmps/subcmps1.circom
index 883d3d290..a38ddc024 100644
--- a/circom/tests/subcmps/subcmps1.circom
+++ b/circom/tests/subcmps/subcmps1.circom
@@ -34,10 +34,10 @@ component main = SubCmps1(3);
 // %lvars =  [ n, i ]
 // %subcmps = [ IsZero[0]{signals=[out,in,inv]}, IsZero[1]{SAME} ]
 //
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
-//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID]]:
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals, 
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID:[0-9]+\.T]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -51,29 +51,36 @@ component main = SubCmps1(3);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
-//CHECK-NEXT:   br label %store3
+//CHECK-NEXT:   %4 = load i256, i256* %subc_[[X4]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subc_[[X4]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %5, align 4
+//CHECK-NEXT:   br label %fold_true3
 //CHECK-EMPTY: 
-//CHECK-NEXT: store3:
-//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X4]], i32 0
-//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
-//CHECK-NEXT:   store i256 %6, i256* %7, align 4
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %constraint1 = alloca i1, align 1
-//CHECK-NEXT:   call void @__constraint_values(i256 %6, i256 %8, i1* %constraint1)
+//CHECK-NEXT: fold_true3:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 1)
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %11, align 4
-//CHECK-NEXT:   br label %return5
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %7, i256* %8, align 4
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %constraint1 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %7, i256 %9, i1* %constraint1)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %return6
 //CHECK-EMPTY: 
-//CHECK-NEXT: return5:
+//CHECK-NEXT: return6:
 //CHECK-NEXT:   ret void
 //CHECK-NEXT: }
 //
diff --git a/circom/tests/subcmps/subcmps2.circom b/circom/tests/subcmps/subcmps2.circom
index 23f932c8e..e7a6eaa04 100644
--- a/circom/tests/subcmps/subcmps2.circom
+++ b/circom/tests/subcmps/subcmps2.circom
@@ -35,226 +35,248 @@ template Caller() {
 
 component main = Caller();
 
-//CHECK-LABEL: define void @Caller_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %0)
-//CHECK: %[[CALL_VAL:call\.nop_[0-3]]] = call i256 @nop_{{[0-3]}}(i256* %6)
-//CHECK: %[[SUBCMP_PTR:.*]] = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 {{[0-3]}}
-//CHECK: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUBCMP_PTR]]
-//CHECK: %[[SUBCMP_INP:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 {{[1-4]}}
-//CHECK: store i256 %[[CALL_VAL]], i256* %[[SUBCMP_INP]]
-
-/*
-define void @Sum_0_build({ [0 x i256]*, i32 }* %0) !dbg !9 {
-main:
-  %1 = alloca [5 x i256], align 8
-  %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
-  store i32 4, i32* %2, align 4
-  %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
-  %4 = bitcast [5 x i256]* %1 to [0 x i256]*
-  store [0 x i256]* %4, [0 x i256]** %3, align 8
-  ret void
-}
-
-define void @Sum_0_run([0 x i256]* %0) !dbg !11 {
-prelude:
-  %lvars = alloca [3 x i256], align 8
-  %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
-  br label %store1
-
-store1:
-  %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
-  store i256 4, i256* %1, align 4
-  br label %store2
-
-store2:
-  %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  store i256 0, i256* %2, align 4
-  br label %store3
-
-store3:
-  %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-  store i256 0, i256* %3, align 4
-  br label %unrolled_loop4
-
-unrolled_loop4:
-  %4 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  %5 = load i256, i256* %4, align 4
-  %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-  %7 = load i256, i256* %6, align 4
-  %call.fr_add = call i256 @fr_add(i256 %5, i256 %7)
-  %8 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  store i256 %call.fr_add, i256* %8, align 4
-  %9 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-  store i256 1, i256* %9, align 4
-  %10 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  %11 = load i256, i256* %10, align 4
-  %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-  %13 = load i256, i256* %12, align 4
-  %call.fr_add1 = call i256 @fr_add(i256 %11, i256 %13)
-  %14 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  store i256 %call.fr_add1, i256* %14, align 4
-  %15 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-  store i256 2, i256* %15, align 4
-  %16 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  %17 = load i256, i256* %16, align 4
-  %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
-  %19 = load i256, i256* %18, align 4
-  %call.fr_add2 = call i256 @fr_add(i256 %17, i256 %19)
-  %20 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  store i256 %call.fr_add2, i256* %20, align 4
-  %21 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-  store i256 3, i256* %21, align 4
-  %22 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  %23 = load i256, i256* %22, align 4
-  %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
-  %25 = load i256, i256* %24, align 4
-  %call.fr_add3 = call i256 @fr_add(i256 %23, i256 %25)
-  %26 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  store i256 %call.fr_add3, i256* %26, align 4
-  %27 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-  store i256 4, i256* %27, align 4
-  br label %store5
-
-store5:
-  %28 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-  %29 = load i256, i256* %28, align 4
-  %30 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-  store i256 %29, i256* %30, align 4
-  %31 = load i256, i256* %30, align 4
-  %constraint = alloca i1, align 1
-  call void @__constraint_values(i256 %29, i256 %31, i1* %constraint)
-  br label %prologue
-
-prologue:
-  ret void
-}
-
-define void @Caller_1_build({ [0 x i256]*, i32 }* %0) !dbg !18 {
-main:
-  %1 = alloca [5 x i256], align 8
-  %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
-  store i32 4, i32* %2, align 4
-  %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
-  %4 = bitcast [5 x i256]* %1 to [0 x i256]*
-  store [0 x i256]* %4, [0 x i256]** %3, align 8
-  ret void
-}
-
-define void @Caller_1_run([0 x i256]* %0) !dbg !20 {
-prelude:
-  %lvars = alloca [1 x i256], align 8
-  %subcmps = alloca [1 x { [0 x i256]*, i32 }], align 8
-  br label %create_cmp1
-
-create_cmp1:
-  %1 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
-  call void @Sum_0_build({ [0 x i256]*, i32 }* %1)
-  br label %store2
-
-store2:
-  %2 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
-  store i256 0, i256* %2, align 4
-  br label %unrolled_loop3
-
-unrolled_loop3:
-  %nop_0_arena = alloca [1 x i256], align 8
-  %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-  %4 = load i256, i256* %3, align 4
-  %5 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena, i32 0, i32 0
-  store i256 %4, i256* %5, align 4
-  %6 = bitcast [1 x i256]* %nop_0_arena to i256*
-  %call.nop_0 = call i256 @nop_0(i256* %6)
-  %7 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-  %8 = load [0 x i256]*, [0 x i256]** %7, align 8
-  %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
-  store i256 %call.nop_0, i256* %9, align 4
-  %10 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-  %load.subcmp.counter = load i32, i32* %10, align 4
-  %decrement.counter = sub i32 %load.subcmp.counter, 1
-  store i32 %decrement.counter, i32* %10, align 4
-  %11 = load i256, i256* %9, align 4
-  %constraint = alloca i1, align 1
-  call void @__constraint_values(i256 %call.nop_0, i256 %11, i1* %constraint)
-  %12 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
-  store i256 1, i256* %12, align 4
-  %nop_0_arena1 = alloca [1 x i256], align 8
-  %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-  %14 = load i256, i256* %13, align 4
-  %15 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena1, i32 0, i32 0
-  store i256 %14, i256* %15, align 4
-  %16 = bitcast [1 x i256]* %nop_0_arena1 to i256*
-  %call.nop_02 = call i256 @nop_0(i256* %16)
-  %17 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-  %18 = load [0 x i256]*, [0 x i256]** %17, align 8
-  %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i32 2
-  store i256 %call.nop_02, i256* %19, align 4
-  %20 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-  %load.subcmp.counter3 = load i32, i32* %20, align 4
-  %decrement.counter4 = sub i32 %load.subcmp.counter3, 1
-  store i32 %decrement.counter4, i32* %20, align 4
-  %21 = load i256, i256* %19, align 4
-  %constraint5 = alloca i1, align 1
-  call void @__constraint_values(i256 %call.nop_02, i256 %21, i1* %constraint5)
-  %22 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
-  store i256 2, i256* %22, align 4
-  %nop_0_arena6 = alloca [1 x i256], align 8
-  %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
-  %24 = load i256, i256* %23, align 4
-  %25 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena6, i32 0, i32 0
-  store i256 %24, i256* %25, align 4
-  %26 = bitcast [1 x i256]* %nop_0_arena6 to i256*
-  %call.nop_07 = call i256 @nop_0(i256* %26)
-  %27 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-  %28 = load [0 x i256]*, [0 x i256]** %27, align 8
-  %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0, i32 3
-  store i256 %call.nop_07, i256* %29, align 4
-  %30 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-  %load.subcmp.counter8 = load i32, i32* %30, align 4
-  %decrement.counter9 = sub i32 %load.subcmp.counter8, 1
-  store i32 %decrement.counter9, i32* %30, align 4
-  %31 = load i256, i256* %29, align 4
-  %constraint10 = alloca i1, align 1
-  call void @__constraint_values(i256 %call.nop_07, i256 %31, i1* %constraint10)
-  %32 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
-  store i256 3, i256* %32, align 4
-  %nop_0_arena11 = alloca [1 x i256], align 8
-  %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
-  %34 = load i256, i256* %33, align 4
-  %35 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena11, i32 0, i32 0
-  store i256 %34, i256* %35, align 4
-  %36 = bitcast [1 x i256]* %nop_0_arena11 to i256*
-  %call.nop_012 = call i256 @nop_0(i256* %36)
-  %37 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-  %38 = load [0 x i256]*, [0 x i256]** %37, align 8
-  %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0, i32 4
-  store i256 %call.nop_012, i256* %39, align 4
-  %40 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-  %load.subcmp.counter13 = load i32, i32* %40, align 4
-  %decrement.counter14 = sub i32 %load.subcmp.counter13, 1
-  store i32 %decrement.counter14, i32* %40, align 4
-  %41 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-  %42 = load [0 x i256]*, [0 x i256]** %41, align 8
-  call void @Sum_0_run([0 x i256]* %42)
-  %43 = load i256, i256* %39, align 4
-  %constraint15 = alloca i1, align 1
-  call void @__constraint_values(i256 %call.nop_012, i256 %43, i1* %constraint15)
-  %44 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
-  store i256 4, i256* %44, align 4
-  br label %store4
-
-store4:
-  %45 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-  %46 = load [0 x i256]*, [0 x i256]** %45, align 8
-  %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i32 0
-  %48 = load i256, i256* %47, align 4
-  %49 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-  store i256 %48, i256* %49, align 4
-  %50 = load i256, i256* %49, align 4
-  %constraint16 = alloca i1, align 1
-  call void @__constraint_values(i256 %48, i256 %50, i1* %constraint16)
-  br label %prologue
-
-prologue:
-  ret void
-}
-*/
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.F}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], [0 x i256]* %sub_[[X3:[0-9]+]], i256* %subc_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %call1
+//CHECK-EMPTY: 
+//CHECK-NEXT: call1:
+//CHECK-NEXT:   %nop_0_arena = alloca [1 x i256], align 8
+//CHECK-NEXT:   %0 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena, i32 0, i32 0
+//CHECK-NEXT:   %1 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   store i256 %2, i256* %0, align 4
+//CHECK-NEXT:   %3 = bitcast [1 x i256]* %nop_0_arena to i256*
+//CHECK-NEXT:   %call.nop_0 = call i256 @nop_0(i256* %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.nop_0, i256* %4, align 4
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %call.nop_0, i256 %5, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %6 = load i256, i256* %subc_[[X4]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %subc_[[X4]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
+//CHECK-NEXT:   br label %fold_false3
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_false3:
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], [0 x i256]* %sub_[[X3:[0-9]+]], i256* %subc_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %call1
+//CHECK-EMPTY: 
+//CHECK-NEXT: call1:
+//CHECK-NEXT:   %nop_0_arena = alloca [1 x i256], align 8
+//CHECK-NEXT:   %0 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena, i32 0, i32 0
+//CHECK-NEXT:   %1 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   store i256 %2, i256* %0, align 4
+//CHECK-NEXT:   %3 = bitcast [1 x i256]* %nop_0_arena to i256*
+//CHECK-NEXT:   %call.nop_0 = call i256 @nop_0(i256* %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.nop_0, i256* %4, align 4
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %call.nop_0, i256 %5, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %6 = load i256, i256* %subc_[[X4]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %subc_[[X4]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
+//CHECK-NEXT:   br label %fold_true3
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true3:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @Sum_0_run([0 x i256]* %sub_[[X3]])
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Sum_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %12 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %13, i256* %14, align 4
+//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %13, i256 %15, i1* %constraint)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @Caller_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [1 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Sum_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %3 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %5 = load [0 x i256]*, [0 x i256]** %4, align 8
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %5, i32 0
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %10 = load [0 x i256]*, [0 x i256]** %9, align 8
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0
+//CHECK-NEXT:   %12 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %13 = bitcast i32* %12 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %3, [0 x i256]* %0, i256* %7, i256* %8, [0 x i256]* %11, i256* %13)
+//CHECK-NEXT:   %14 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %16 = load [0 x i256]*, [0 x i256]** %15, align 8
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %16, i32 0
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0, i256 2
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %20 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %21 = load [0 x i256]*, [0 x i256]** %20, align 8
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %21, i32 0
+//CHECK-NEXT:   %23 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %24 = bitcast i32* %23 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %18, i256* %19, [0 x i256]* %22, i256* %24)
+//CHECK-NEXT:   %25 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %27 = load [0 x i256]*, [0 x i256]** %26, align 8
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %27, i32 0
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0, i256 3
+//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %31 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %32 = load [0 x i256]*, [0 x i256]** %31, align 8
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %32, i32 0
+//CHECK-NEXT:   %34 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %35 = bitcast i32* %34 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %25, [0 x i256]* %0, i256* %29, i256* %30, [0 x i256]* %33, i256* %35)
+//CHECK-NEXT:   %36 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %37 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %38 = load [0 x i256]*, [0 x i256]** %37, align 8
+//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0, i256 4
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %42 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
+//CHECK-NEXT:   %45 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %46 = bitcast i32* %45 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %36, [0 x i256]* %0, i256* %40, i256* %41, [0 x i256]* %44, i256* %46)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %47 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %48 = load [0 x i256]*, [0 x i256]** %47, align 8
+//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %48, i32 0, i32 0
+//CHECK-NEXT:   %50 = load i256, i256* %49, align 4
+//CHECK-NEXT:   %51 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %50, i256* %51, align 4
+//CHECK-NEXT:   %52 = load i256, i256* %51, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %50, i256 %52, i1* %constraint)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps3.circom b/circom/tests/subcmps/subcmps3.circom
new file mode 100644
index 000000000..63eac590b
--- /dev/null
+++ b/circom/tests/subcmps/subcmps3.circom
@@ -0,0 +1,231 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Sum(n) {
+    signal input inp[n];
+    signal output outp;
+
+    var s = 0;
+
+    for (var i = 0; i < n; i++) {
+        s += inp[i];
+    }
+
+    outp <== s;
+}
+
+template SubCmps3() {
+    signal input inp[4];
+    signal output outp;
+
+    component s = Sum(4);
+
+    for (var i = 0; i < 4; i++) {
+        s.inp[i] <== inp[i];
+        if (i == 3) {
+            outp <== s.outp;
+        }
+    }
+}
+
+component main = SubCmps3();
+
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.F}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+\.F]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %fold_false2
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_false2:
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 1)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %return4
+//CHECK-EMPTY: 
+//CHECK-NEXT: return4:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %fold_true2
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true2:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %constraint1 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %5, i256 %7, i1* %constraint1)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return4
+//CHECK-EMPTY: 
+//CHECK-NEXT: return4:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Sum_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %12 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %13, i256* %14, align 4
+//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %13, i256 %15, i1* %constraint)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps3_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [1 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Sum_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %3 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %5 = load [0 x i256]*, [0 x i256]** %4, align 8
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %5, i32 0
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %3, [0 x i256]* %0, i256* %7, i256* %8, i256* null)
+//CHECK-NEXT:   %9 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %11 = load [0 x i256]*, [0 x i256]** %10, align 8
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %11, i32 0
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i256 2
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %9, [0 x i256]* %0, i256* %13, i256* %14, i256* null)
+//CHECK-NEXT:   %15 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i256 3
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0, i256* %19, i256* %20, i256* null)
+//CHECK-NEXT:   %21 = bitcast [1 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %22 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %23 = load [0 x i256]*, [0 x i256]** %22, align 8
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %24, i32 0, i256 4
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %27 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
+//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %21, [0 x i256]* %0, i256* %25, i256* %26, i256* %30)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 28b600f44..24251ed66 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -1,5 +1,5 @@
 use std::cell::Ref;
-use std::collections::{HashMap, BTreeMap};
+use std::collections::{HashMap, BTreeMap, HashSet};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
@@ -7,7 +7,7 @@ use compiler::intermediate_representation::{Instruction, BucketId};
 use compiler::intermediate_representation::ir_interface::{AddressType, ValueBucket, ValueType};
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::loop_unroll::body_extractor::ToOriginalLocation;
+use crate::passes::loop_unroll::body_extractor::{ToOriginalLocation, FuncArgIdx};
 use super::{Env, LibraryAccess};
 
 /// This Env is used to process functions created by extracting loop bodies
@@ -20,6 +20,13 @@ pub struct ExtractedFuncEnvData<'a> {
     base: Box<Env<'a>>,
     caller: BucketId,
     remap: ToOriginalLocation,
+    arenas: HashSet<FuncArgIdx>,
+}
+
+macro_rules! update_inner {
+    ($self: expr, $inner: expr) => {{
+        ExtractedFuncEnvData::new($inner, &$self.caller, $self.remap, $self.arenas)
+    }};
 }
 
 impl Display for ExtractedFuncEnvData<'_> {
@@ -44,8 +51,13 @@ impl LibraryAccess for ExtractedFuncEnvData<'_> {
 //  AddressType::SubcmpSignal references created by ExtractedFunctionLocationUpdater
 //  back into the proper reference to access the correct Env entry.
 impl<'a> ExtractedFuncEnvData<'a> {
-    pub fn new(inner: Env<'a>, caller: &BucketId, remap: ToOriginalLocation) -> Self {
-        ExtractedFuncEnvData { base: Box::new(inner), caller: caller.clone(), remap }
+    pub fn new(
+        inner: Env<'a>,
+        caller: &BucketId,
+        remap: ToOriginalLocation,
+        arenas: HashSet<FuncArgIdx>,
+    ) -> Self {
+        ExtractedFuncEnvData { base: Box::new(inner), caller: caller.clone(), remap, arenas }
     }
 
     pub fn extracted_func_caller(&self) -> Option<&BucketId> {
@@ -68,7 +80,11 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
         let res = match self.remap.get(&subcmp_idx) {
-            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            None => {
+                //ASSERT: ArgIndex::SubCmp 'arena' parameters are not in 'remap' but all others are.
+                assert!(self.arenas.contains(&subcmp_idx));
+                unreachable!();
+            }
             Some((loc, idx)) => {
                 //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
                 //  the LocationRule that 'signal_idx' is computed from.
@@ -76,7 +92,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 match loc {
                     AddressType::Variable => self.base.get_var(*idx),
                     AddressType::Signal => self.base.get_signal(*idx),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                    AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
                         let subcmp = match **cmp_address {
                             Instruction::Value(ValueBucket {
                                 parse_as: ValueType::U32,
@@ -85,7 +101,13 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             }) => value,
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
-                        self.base.get_subcmp_signal(subcmp, *idx)
+                        if *counter_override {
+                            // ASSERT: always 0 from 'get_reverse_passing_refs_for_itr' in 'body_extractor.rs'
+                            assert_eq!(*idx, 0);
+                            self.base.get_subcmp_counter(subcmp)
+                        } else {
+                            self.base.get_subcmp_signal(subcmp, *idx)
+                        }
                     }
                 }
             }
@@ -95,12 +117,16 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
         match self.remap.get(&subcmp_idx) {
-            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            None => {
+                //ASSERT: ArgIndex::SubCmp 'arena' parameters are not in 'remap' but all others are.
+                assert!(self.arenas.contains(&subcmp_idx));
+                unreachable!();
+            }
             Some((loc, idx)) => {
                 match loc {
                     AddressType::Variable => self.base.get_subcmp_name(*idx),
                     AddressType::Signal => self.base.get_subcmp_name(*idx),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                    AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
                         let subcmp = match **cmp_address {
                             Instruction::Value(ValueBucket {
                                 parse_as: ValueType::U32,
@@ -109,10 +135,14 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             }) => value,
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
-                        //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
-                        //  the LocationRule that 'signal_idx' is computed from.
-                        assert_eq!(*idx, 0);
-                        self.base.get_subcmp_name(subcmp)
+                        if *counter_override {
+                            unreachable!();
+                        } else {
+                            //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                            //  the LocationRule that 'idx' is computed from.
+                            assert_eq!(*idx, 0);
+                            self.base.get_subcmp_name(subcmp)
+                        }
                     }
                 }
             }
@@ -121,12 +151,16 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
         match self.remap.get(&subcmp_idx) {
-            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            None => {
+                //ASSERT: ArgIndex::SubCmp 'arena' parameters are not in 'remap' but all others are.
+                assert!(self.arenas.contains(&subcmp_idx));
+                unreachable!();
+            }
             Some((loc, idx)) => {
                 match loc {
                     AddressType::Variable => self.base.get_subcmp_template_id(*idx),
                     AddressType::Signal => self.base.get_subcmp_template_id(*idx),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                    AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
                         let subcmp = match **cmp_address {
                             Instruction::Value(ValueBucket {
                                 parse_as: ValueType::U32,
@@ -135,27 +169,35 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             }) => value,
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
-                        //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
-                        //  the LocationRule that 'signal_idx' is computed from.
-                        assert_eq!(*idx, 0);
-                        self.base.get_subcmp_template_id(subcmp)
+                        if *counter_override {
+                            unreachable!();
+                        } else {
+                            //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                            //  the LocationRule that 'signal_idx' is computed from.
+                            assert_eq!(*idx, 0);
+                            self.base.get_subcmp_template_id(subcmp)
+                        }
                     }
                 }
             }
         }
     }
 
+    pub fn get_subcmp_counter(&self, _subcmp_idx: usize) -> Value {
+        todo!()
+    }
+
     pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
         let res = match self.remap.get(&subcmp_idx).cloned() {
-            //TODO: Is this None case being hit by a pre-existing subcmp at index 0 reference? I think so. Can I verify?
-            //  All subcmp refs in extracted body should have been replaced with refs to a subfix parameter... right?
-            //OBS: It happens because there will be Unknown counter when certain loop bodies are extracted to a function.
-            //  That means I do need to add the code to decrement counters inside the loop and let StoreBucket generate
-            //  the counter checks that will determine when to execute the "run" function at runtime.
-            None => todo!(), //false, // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            None => {
+                //ASSERT: ArgIndex::SubCmp 'arena' parameters are not in 'remap' but all others are.
+                assert!(self.arenas.contains(&subcmp_idx));
+                // This will be reached for the StoreBucket that generates a call to the "_run" function.
+                return true; // True to execute the run_subcmp function
+            }
             Some((loc, _)) => {
                 match loc {
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                    AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
                         let subcmp = match *cmp_address {
                             Instruction::Value(ValueBucket {
                                 parse_as: ValueType::U32,
@@ -164,7 +206,11 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             }) => value,
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
-                        self.base.subcmp_counter_is_zero(subcmp)
+                        if counter_override {
+                            todo!()
+                        } else {
+                            self.base.subcmp_counter_is_zero(subcmp)
+                        }
                     }
                     _ => false, // no counter for Variable/Signal types
                 }
@@ -175,10 +221,14 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
         let res = match self.remap.get(&subcmp_idx).cloned() {
-            None => todo!(), //false, // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            None => {
+                //ASSERT: ArgIndex::SubCmp 'arena' parameters are not in 'remap' but all others are.
+                assert!(self.arenas.contains(&subcmp_idx));
+                unreachable!();
+            }
             Some((loc, _)) => {
                 match loc {
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                    AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
                         let subcmp = match *cmp_address {
                             Instruction::Value(ValueBucket {
                                 parse_as: ValueType::U32,
@@ -187,7 +237,11 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             }) => value,
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
-                        self.base.subcmp_counter_equal_to(subcmp, value)
+                        if counter_override {
+                            todo!()
+                        } else {
+                            self.base.subcmp_counter_equal_to(subcmp, value)
+                        }
                     }
                     _ => false, // no counter for Variable/Signal types
                 }
@@ -206,40 +260,44 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn set_var(self, idx: usize, value: Value) -> Self {
         // Local variables are referenced in the normal way
-        ExtractedFuncEnvData::new(self.base.set_var(idx, value), &self.caller, self.remap)
+        update_inner!(self, self.base.set_var(idx, value))
     }
 
     pub fn set_signal(self, idx: usize, value: Value) -> Self {
         // Signals are referenced in the normal way
-        ExtractedFuncEnvData::new(self.base.set_signal(idx, value), &self.caller, self.remap)
+        update_inner!(self, self.base.set_signal(idx, value))
     }
 
     pub fn set_all_to_unk(self) -> Self {
-        // Local variables are referenced in the normal way
-        ExtractedFuncEnvData::new(self.base.set_all_to_unk(), &self.caller, self.remap)
+        update_inner!(self, self.base.set_all_to_unk())
     }
 
-    pub fn set_subcmp_to_unk(self, _subcmp_idx: usize) -> Self {
-        unreachable!()
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        // The index here is already converted within BucketInterpreter::get_write_operations_in_store_bucket
+        //  via interpreting the LocationRule and performing the PassMemory lookup on the unchanged scope
+        //  (per comment in BucketInterpreter::run_function_loopbody).
+        update_inner!(self, self.base.set_subcmp_to_unk(subcmp_idx))
     }
 
-    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, new_value: Value) -> Self {
         //NOTE: This is only called by BucketInterpreter::store_value_in_address.
         //Use the map from loop unrolling to convert the SubcmpSignal reference back
         //  into the proper reference (reversing ExtractedFunctionLocationUpdater).
         let new_env = match self.remap.get(&subcmp_idx).cloned() {
-            //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
-            //  to the 'remap' (producing None result here) because those parameters are
-            //  not actually used to access signals, just to call _run and update counter.
-            None => *self.base,
+            None => {
+                //ASSERT: ArgIndex::SubCmp 'arena' parameters are not in 'remap' but all others are.
+                assert!(self.arenas.contains(&subcmp_idx));
+                // This will be reached for the StoreBucket that generates a call to the "_run" function.
+                return self; // Nothing needs to be done.
+            }
             Some((loc, idx)) => {
                 //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
                 //  the LocationRule that 'signal_idx' is computed from.
                 assert_eq!(signal_idx, 0);
                 match loc {
-                    AddressType::Variable => self.base.set_var(idx, value),
-                    AddressType::Signal => self.base.set_signal(idx, value),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                    AddressType::Variable => self.base.set_var(idx, new_value),
+                    AddressType::Signal => self.base.set_signal(idx, new_value),
+                    AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
                         let subcmp = match *cmp_address {
                             Instruction::Value(ValueBucket {
                                 parse_as: ValueType::U32,
@@ -248,48 +306,31 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             }) => value,
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
-                        self.base.set_subcmp_signal(subcmp, idx, value)
+                        if counter_override {
+                            // ASSERT: always 0 from 'get_reverse_passing_refs_for_itr' in 'body_extractor.rs'
+                            assert_eq!(idx, 0);
+                            // NOTE: If unwrapping to u32 directly causes a panic, then need to allow Value as the parameter.
+                            self.base.set_subcmp_counter(subcmp, new_value.get_u32())
+                        } else {
+                            self.base.set_subcmp_signal(subcmp, idx, new_value)
+                        }
                     }
                 }
             }
         };
-        ExtractedFuncEnvData::new(new_env, &self.caller, self.remap)
+        update_inner!(self, new_env)
     }
 
-    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
-        let new_env = match self.remap.get(&subcmp_idx).cloned() {
-            //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
-            //  to the 'remap' (producing None result here) because those parameters are
-            //  not actually used to access signals, just to call _run and update counter.
-            //  No counter update needed when SubcmpSignal is used for these special cases.
-            None => *self.base,
-            Some((loc, _)) => {
-                match loc {
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
-                        let subcmp = match *cmp_address {
-                            Instruction::Value(ValueBucket {
-                                parse_as: ValueType::U32,
-                                value,
-                                ..
-                            }) => value,
-                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
-                        };
-                        self.base.decrease_subcmp_counter(subcmp)
-                    }
-                    _ => *self.base, // no counter for Variable/Signal types
-                }
-            }
-        };
-        ExtractedFuncEnvData::new(new_env, &self.caller, self.remap)
+    pub fn set_subcmp_counter(self, _subcmp_idx: usize, _new_val: usize) -> Self {
+        todo!()
     }
 
-    pub fn run_subcmp(
-        self,
-        _subcmp_idx: usize,
-        _name: &String,
-        _interpreter: &BucketInterpreter,
-        _observe: bool,
-    ) -> Self {
+    pub fn decrease_subcmp_counter(self, _subcmp_idx: usize) -> Self {
+        //Do nothing because subcmp counter is managed explicitly in extracted functions
+        self
+    }
+
+    pub fn run_subcmp(self, _: usize, _: &String, _: &BucketInterpreter, _: bool) -> Self {
         //Return self just like the StandardEnvData
         self
     }
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index 647e3b201..537ac3c80 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -1,12 +1,12 @@
 use std::cell::Ref;
-use std::collections::{HashMap, BTreeMap};
+use std::collections::{HashMap, BTreeMap, HashSet};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::intermediate_representation::BucketId;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::loop_unroll::body_extractor::{LoopBodyExtractor, ToOriginalLocation};
+use crate::passes::loop_unroll::body_extractor::{LoopBodyExtractor, ToOriginalLocation, FuncArgIdx};
 use self::extracted_func_env::ExtractedFuncEnvData;
 use self::standard_env::StandardEnvData;
 use self::unrolled_block_env::UnrolledBlockEnvData;
@@ -49,6 +49,16 @@ impl SubcmpEnv {
         copy
     }
 
+    pub fn get_counter(&self) -> usize {
+        self.counter
+    }
+
+    pub fn set_counter(self, new_val: usize) -> SubcmpEnv {
+        let mut copy = self;
+        copy.counter = new_val;
+        copy
+    }
+
     pub fn counter_is_zero(&self) -> bool {
         self.counter == 0
     }
@@ -121,8 +131,9 @@ impl<'a> Env<'a> {
         inner: Env<'a>,
         caller: &BucketId,
         remap: ToOriginalLocation,
+        arenas: HashSet<FuncArgIdx>,
     ) -> Self {
-        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner, caller, remap))
+        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner, caller, remap, arenas))
     }
 
     pub fn peel_extracted_func(self) -> Self {
@@ -173,6 +184,14 @@ impl<'a> Env<'a> {
         }
     }
 
+    pub fn get_subcmp_counter(&self, subcmp_idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_subcmp_counter(subcmp_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_counter(subcmp_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_counter(subcmp_idx),
+        }
+    }
+
     pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
         match self {
             Env::Standard(d) => d.subcmp_counter_is_zero(subcmp_idx),
@@ -251,6 +270,16 @@ impl<'a> Env<'a> {
         }
     }
 
+    pub fn set_subcmp_counter(self, subcmp_idx: usize, new_val: usize) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_subcmp_counter(subcmp_idx, new_val)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_subcmp_counter(subcmp_idx, new_val)),
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.set_subcmp_counter(subcmp_idx, new_val))
+            }
+        }
+    }
+
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
         match self {
             Env::Standard(d) => Env::Standard(d.decrease_subcmp_counter(subcmp_idx)),
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
index d47c6be05..38e43a532 100644
--- a/circuit_passes/src/bucket_interpreter/env/standard_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -71,6 +71,10 @@ impl<'a> StandardEnvData<'a> {
         self.subcmps[&subcmp_idx].template_id
     }
 
+    pub fn get_subcmp_counter(&self, subcmp_idx: usize) -> Value {
+        Value::KnownU32(self.subcmps.get(&subcmp_idx).unwrap().get_counter())
+    }
+
     pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
         self.subcmps.get(&subcmp_idx).unwrap().counter_is_zero()
     }
@@ -119,32 +123,28 @@ impl<'a> StandardEnvData<'a> {
 
     /// Sets all the signals of the subcmp to UNK
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
-        let mut copy = self;
-        let subcmp_env = copy
-            .subcmps
-            .remove(&subcmp_idx)
-            .expect(format!("Can't set a signal of subcomponent {}", subcmp_idx).as_str());
-        copy.subcmps.insert(subcmp_idx, subcmp_env.reset());
-        copy
+        self.update_subcmp(subcmp_idx, |subcmp_env| subcmp_env.reset())
     }
 
     pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
-        let mut copy = self;
-        let subcmp_env = copy
-            .subcmps
-            .remove(&subcmp_idx)
-            .expect(format!("Can't set a signal of subcomponent {}", subcmp_idx).as_str());
-        copy.subcmps.insert(subcmp_idx, subcmp_env.set_signal(signal_idx, value));
-        copy
+        self.update_subcmp(subcmp_idx, |subcmp_env| subcmp_env.set_signal(signal_idx, value))
+    }
+
+    pub fn set_subcmp_counter(self, subcmp_idx: usize, new_val: usize) -> Self {
+        self.update_subcmp(subcmp_idx, |subcmp_env| subcmp_env.set_counter(new_val))
     }
 
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        self.update_subcmp(subcmp_idx, |subcmp_env| subcmp_env.decrease_counter())
+    }
+
+    fn update_subcmp(self, subcmp_idx: usize, f: impl FnOnce(SubcmpEnv) -> SubcmpEnv) -> Self {
         let mut copy = self;
         let subcmp_env = copy
             .subcmps
             .remove(&subcmp_idx)
-            .expect(format!("Can't decrease counter of subcomponent {}", subcmp_idx).as_str());
-        copy.subcmps.insert(subcmp_idx, subcmp_env.decrease_counter());
+            .expect(format!("Can't find subcomponent {}", subcmp_idx).as_str());
+        copy.subcmps.insert(subcmp_idx, f(subcmp_env));
         copy
     }
 
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
index 7f52ca72d..4bb4f8ba0 100644
--- a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -75,6 +75,10 @@ impl<'a> UnrolledBlockEnvData<'a> {
         self.base.get_subcmp_template_id(subcmp_idx)
     }
 
+    pub fn get_subcmp_counter(&self, subcmp_idx: usize) -> Value {
+        self.base.get_subcmp_counter(subcmp_idx)
+    }
+
     pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
         self.base.subcmp_counter_is_zero(subcmp_idx)
     }
@@ -126,6 +130,13 @@ impl<'a> UnrolledBlockEnvData<'a> {
         }
     }
 
+    pub fn set_subcmp_counter(self, subcmp_idx: usize, new_val: usize) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_subcmp_counter(subcmp_idx, new_val)),
+            extractor: self.extractor,
+        }
+    }
+
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
         UnrolledBlockEnvData {
             base: Box::new(self.base.decrease_subcmp_counter(subcmp_idx)),
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index e3896950e..c3fa1fd1f 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -66,21 +66,19 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         subcmps: &mut Vec<usize>,
         env: &Env,
     ) {
+        let idx = self.get_index_from_location(&bucket.dest, env);
         match bucket.dest_address_type {
             AddressType::Variable => {
-                let idx = self.get_index_from_location(&bucket.dest, env);
                 for index in self.mem.get_variables_index_mapping(&self.scope, &idx) {
                     vars.push(index);
                 }
             }
             AddressType::Signal => {
-                let idx = self.get_index_from_location(&bucket.dest, env);
                 for index in self.mem.get_signal_index_mapping(&self.scope, &idx) {
                     signals.push(index);
                 }
             }
             AddressType::SubcmpSignal { .. } => {
-                let idx = self.get_index_from_location(&bucket.dest, env);
                 for index in self.mem.get_component_addr_index_mapping(&self.scope, &idx) {
                     subcmps.push(index);
                 }
@@ -388,6 +386,7 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         env: Env<'env>,
         observe: bool,
     ) -> R<'env> {
+        // println!("Interpreter executing {:?}", bucket);
         let (src, env) = self.execute_instruction(&bucket.src, env, observe);
         let src = src.expect("src instruction in StoreBucket must produce a value!");
         let env =
@@ -427,18 +426,20 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         if cfg!(debug_assertions) {
             println!("Running function {}", name);
         };
-        let mut res: R<'env> = (
-            None,
-            Env::new_extracted_func_env(
-                env.clone(),
-                &bucket.id,
-                if name.starts_with(LOOP_BODY_FN_PREFIX) {
-                    self.global_data.borrow().get_data_for_func(name)[&env.get_vars_sort()].clone()
-                } else {
-                    Default::default()
-                },
-            ),
-        );
+        let mut res: R<'env> = (None, {
+            if name.starts_with(LOOP_BODY_FN_PREFIX) {
+                let gdat = self.global_data.borrow();
+                let fdat = &gdat.get_data_for_func(name)[&env.get_vars_sort()];
+                Env::new_extracted_func_env(env.clone(), &bucket.id, fdat.0.clone(), fdat.1.clone())
+            } else {
+                Env::new_extracted_func_env(
+                    env.clone(),
+                    &bucket.id,
+                    Default::default(),
+                    Default::default(),
+                )
+            }
+        });
         //NOTE: Do not change scope for the new interpreter because the mem lookups within
         //  `get_write_operations_in_store_bucket` need to use the original function context.
         let interp = self.mem.build_interpreter(self.global_data, self.observer);
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 6f3ea599f..9044450c6 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -5,7 +5,7 @@ use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{InstructionPointer, new_id, BucketId};
 use compiler::intermediate_representation::ir_interface::*;
-use indexmap::IndexMap;
+use indexmap::{IndexMap, IndexSet};
 use crate::bucket_interpreter::env::{Env, LibraryAccess};
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
@@ -22,6 +22,8 @@ pub struct ConditionalFlatteningPass<'d> {
     /// interpreter is currently analyzing code that is not in one of the generated loopbody functions)
     /// to a list of (ID, evaluated condition) pairs for the BranchBuckets in the current context.
     evaluated_conditions: RefCell<HashMap<Option<BucketId>, BranchValues>>,
+    /// Track the order that the branches appear in the traversal to stabilize output for lit tests.
+    branch_bucket_order: RefCell<IndexSet<BucketId>>,
     /// Maps CallBucket symbol (i.e. target function name) to BranchBucket value mapping to the
     /// new function that has brances simplified according to that mapping.
     /// NOTE: Uses IndexMap to preserve insertion order to stabilize lit test output.
@@ -37,6 +39,7 @@ impl<'d> ConditionalFlatteningPass<'d> {
             global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
             evaluated_conditions: Default::default(),
+            branch_bucket_order: Default::default(),
             new_functions: Default::default(),
             //The None key in this map is for the cases that are NOT inside the loopbody functions. When
             // traversal enters a loopbody function, this will change to the BranchValues of that CallBucket.
@@ -95,6 +98,7 @@ impl InterpreterObserver for ConditionalFlatteningPass<'_> {
     }
 
     fn on_branch_bucket(&self, bucket: &BranchBucket, env: &Env) -> bool {
+        println!("conditional_flattening::on_branch_bucket = {:?}", bucket.id);
         let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (_, cond_result, _) = interpreter.execute_conditional_bucket(
             &bucket.cond,
@@ -114,7 +118,17 @@ impl InterpreterObserver for ConditionalFlatteningPass<'_> {
             .borrow_mut()
             .entry(in_func)
             .or_default()
-            .insert(bucket.id, cond_result);
+            .entry(bucket.id)
+            // If an existing entry is not equal to the new computed value, use None for unknown
+            .and_modify(|e| {
+                if *e != cond_result {
+                    *e = None
+                }
+            })
+            // If there was no entry, insert the computed value
+            .or_insert(cond_result);
+        //
+        self.branch_bucket_order.borrow_mut().insert(bucket.id);
         true
     }
 
@@ -168,9 +182,10 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
 
     fn transform_call_bucket(&self, bucket: &CallBucket) -> InstructionPointer {
         let call_bucket_id = Some(bucket.id);
+        // The Some keys in the 'evaluated_conditions' map are for the cases that are inside
+        //  the loopbody functions when executed from the CallBucket.id used as the key.
         // NOTE: This borrow is inside brackets to prevent runtime double borrow error.
         let ec = { self.evaluated_conditions.borrow_mut().remove(&call_bucket_id) };
-        // The Some keys in this map are for the cases that are inside the loopbody functions.
         if let Some(ev) = ec {
             // If there are any conditions that evaluated to a known value, replace the
             //  CallBucket target function with a simplified version of that function.
@@ -178,11 +193,16 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
                 let mut nf = self.new_functions.borrow_mut();
                 // Check if the needed function exists, else create it.
                 let old_name = &bucket.symbol;
-                let new_name = ev.values().into_iter().fold(old_name.clone(), |acc, e| match e {
-                    Some(true) => format!("{}.T", acc),
-                    Some(false) => format!("{}.F", acc),
-                    None => format!("{}.N", acc),
-                });
+                // Build the new function name according to the values in 'ev' but sorted by 'branch_bucket_order'
+                let new_name =
+                    self.branch_bucket_order.borrow().iter().filter_map(|id| ev.get(id)).fold(
+                        old_name.clone(),
+                        |acc, e| match e {
+                            Some(true) => format!("{}.T", acc),
+                            Some(false) => format!("{}.F", acc),
+                            None => format!("{}.N", acc),
+                        },
+                    );
                 let new_target = nf
                     .entry(bucket.symbol.clone())
                     .or_default()
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index cc52854e3..d77527399 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -1,5 +1,5 @@
 use std::cell::{RefCell, Ref};
-use std::collections::{BTreeMap, HashMap};
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::vec;
 use indexmap::{IndexMap, IndexSet};
 use code_producers::llvm_elements::fr::*;
@@ -10,7 +10,7 @@ use compiler::intermediate_representation::{
 };
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::loop_unroll::LOOP_BODY_FN_PREFIX;
+use crate::passes::loop_unroll::{DEBUG_LOOP_UNROLL, LOOP_BODY_FN_PREFIX};
 use crate::passes::loop_unroll::extracted_location_updater::ExtractedFunctionLocationUpdater;
 use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
 use crate::passes::{builders, checks};
@@ -90,16 +90,42 @@ impl ExtraArgsResult {
             .collect()
     }
 
-    fn get_reverse_passing_refs_for_itr(&self, iter_num: usize) -> ToOriginalLocation {
-        self.bucket_to_itr_to_ref.iter().fold(ToOriginalLocation::new(), |mut acc, (k, v)| {
-            if let Some((addr_ty, addr_offset)) = v[iter_num].as_ref() {
-                acc.insert(
-                    self.bucket_to_args[k].get_signal_idx(),
-                    (addr_ty.clone(), *addr_offset),
-                );
-            }
-            acc
-        })
+    fn get_reverse_passing_refs_for_itr(
+        &self,
+        iter_num: usize,
+    ) -> (ToOriginalLocation, HashSet<FuncArgIdx>) {
+        self.bucket_to_itr_to_ref.iter().fold(
+            (ToOriginalLocation::new(), HashSet::new()),
+            |mut acc, (k, v)| {
+                if let Some((addr_ty, addr_offset)) = v[iter_num].as_ref() {
+                    let ai = self.bucket_to_args[k];
+                    acc.0.insert(ai.get_signal_idx(), (addr_ty.clone(), *addr_offset));
+                    // If applicable, insert the subcmp counter reference as well
+                    if let ArgIndex::SubCmp { counter, arena, .. } = ai {
+                        match addr_ty {
+                            AddressType::SubcmpSignal { counter_override, cmp_address, .. } => {
+                                assert_eq!(*counter_override, false); //there's no counter for a counter
+                                let counter_addr_ty = AddressType::SubcmpSignal {
+                                    cmp_address: cmp_address.clone(),
+                                    uniform_parallel_value: None,
+                                    is_output: false,
+                                    input_information: InputInformation::NoInput,
+                                    counter_override: true,
+                                };
+                                // NOTE: when there's a true subcomponent (indicated by the ArgIndex::SubCmp check above),
+                                //  the 'addr_offset' indicates which signal inside the subcomponent is accessed. That
+                                //  value is not relevant here because subcomponents have a single counter variable.
+                                acc.0.insert(counter, (counter_addr_ty, 0));
+                                //
+                                acc.1.insert(arena);
+                            }
+                            _ => unreachable!(), // SubcmpSignal was created for all of these refs
+                        }
+                    }
+                }
+                acc
+            },
+        )
     }
 }
 
@@ -275,6 +301,21 @@ impl LoopBodyExtractor {
         func_name
     }
 
+    /// Create an Iterator containing the results of applying the given
+    /// function to only the `Some` entries in the given vector.
+    fn filter_map<'a, A, B, C>(
+        column: &'a Vec<Option<(A, B)>>,
+        f: impl FnMut(&(A, B)) -> C + 'a,
+    ) -> impl Iterator<Item = C> + '_ {
+        column.iter().filter_map(|x| x.as_ref()).map(f)
+    }
+
+    /// Create an Iterator containing the results of applying the given
+    /// function to only the `Some` entries in the given vector.
+    fn filter_map_any<A, B>(column: &Vec<Option<(A, B)>>, f: impl FnMut(&(A, B)) -> bool) -> bool {
+        column.iter().filter_map(|x| x.as_ref()).any(f)
+    }
+
     /// The ideal scenario for extracting the loop body into a new function is to only
     /// need 2 function arguments, lvars and signals. However, we want to avoid variable
     /// indexing within the extracted function so we include extra pointer arguments
@@ -282,7 +323,7 @@ impl LoopBodyExtractor {
     /// unrolled and the indexing will become known constant values. This computes the
     /// extra arguments that will be needed.
     fn compute_extra_args<'a>(recorder: &'a EnvRecorder<'a, '_>) -> ExtraArgsResult {
-        // Table structure indexed first by load/store BucketId, then by iteration number.
+        // Table structure indexed first by load/store/call BucketId, then by iteration number.
         //  View the first (BucketId) as columns and the second (iteration number) as rows.
         //  The data reference is wrapped in Option to allow for some iterations that don't
         //  execute a specific bucket due to conditional branches within the loop body.
@@ -311,17 +352,18 @@ impl LoopBodyExtractor {
                 assert!(temp.is_none() || !temp.unwrap().1.is_unknown());
                 column.push(temp.map(|(a, v)| (a.clone(), v.get_u32())));
             }
+            if DEBUG_LOOP_UNROLL {
+                println!("bucket {} refs by iteration: {:?}", id, column);
+            }
             // ASSERT: same AddressType kind for this bucket in every (available) iteration
-            assert!(checks::all_same(
-                column.iter().filter_map(|x| x.as_ref()).map(|x| std::mem::discriminant(&x.0))
-            ));
+            assert!(Self::all_same(Self::filter_map(column, |(x, _)| std::mem::discriminant(x))));
 
-            // Check if the computed index value for this bucket is the same across all iterations (where it is
-            //  not None, see earlier comment). If it is not, then an extra function argument is needed for it.
-            //  Actually, check not only the computed index Value but the AddressType as well to capture when
-            //  it's a SubcmpSignal referencing a different subcomponent (the AddressType::cmp_address field
-            //  was also interpreted within the EnvRecorder so this comparison will be accurate).
-            if !checks::all_same(column.iter().filter_map(|x| x.as_ref())) {
+            // If the computed index value for this bucket is NOT the same across all available
+            //  iterations (i.e. where it is not None, see earlier comment) or if the AddressType
+            //  is SubcmpSignal, then an extra function argument is needed for it.
+            if Self::filter_map_any(column, |(x, _)| matches!(x, AddressType::SubcmpSignal { .. }))
+                || !Self::all_same(Self::filter_map(column, |(_, y)| *y))
+            {
                 bucket_to_args.insert(*id, ArgIndex::Signal(next_idx));
                 next_idx += 1;
             }
@@ -341,7 +383,7 @@ impl LoopBodyExtractor {
                 .map(|(k, col)| (k, &col[iter_num]))
                 .fold(BTreeMap::new(), |mut r, (b, a)| {
                     if let Some((at, _)) = a {
-                        if let AddressType::SubcmpSignal { .. } = at {
+                        if matches!(at, AddressType::SubcmpSignal { .. }) {
                             r.entry(SubcmpSignalHashFix::convert(&at)).or_default().insert(*b);
                         }
                     }
@@ -368,18 +410,14 @@ impl LoopBodyExtractor {
                         ArgIndex::SubCmp { signal: *sig, arena: arena_idx, counter: counter_idx },
                     );
                 } else {
-                    //TODO: What to do when the signal index w/in the subcomp was not variant?
-                    //  Should I just add a parameter anyway? It doesn't hurt to do that so
-                    //  I guess that's the approach to take for now.
-                    bucket_to_args.insert(
-                        *b,
-                        ArgIndex::SubCmp {
-                            signal: next_idx,
-                            arena: arena_idx,
-                            counter: counter_idx,
-                        },
-                    );
-                    next_idx += 1;
+                    //Since SubcmpSignal is always added above, this should be unreachable.
+                    unreachable!()
+                    // bucket_to_args.insert(
+                    //     *b,
+                    //     ArgIndex::SubCmp { signal: next_idx, arena: arena_idx, counter: counter_idx,
+                    //     },
+                    // );
+                    // next_idx += 1;
                 }
             }
         }
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
index 38c87bc93..fb4d7ad68 100644
--- a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -1,5 +1,5 @@
 use indexmap::IndexMap;
-use code_producers::llvm_elements::fr::FR_IDENTITY_ARR_PTR;
+use code_producers::llvm_elements::stdlib::LLVM_DONOTHING_FN_NAME;
 use compiler::intermediate_representation::{BucketId, InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::passes::builders::build_u32_value;
@@ -10,6 +10,13 @@ pub struct ExtractedFunctionLocationUpdater {
     pub insert_after: InstructionList,
 }
 
+/// Used within extracted loopbody functions to replace all storage references
+/// (i.e. AddressType + LocationRule) to instead reference the proper parameter
+/// of the extracted function. These replacements cannot use AddressType::Variable
+/// or AddressType::Signal because ExtractedFunctionLLVMIRProducer references the
+/// first two parameters of the extracted function via those. Therefore, it must
+/// use SubcmpSignal which will work seamlessly with existing subcmps because they
+/// will also just be passed as additional parameters to the function.
 impl ExtractedFunctionLocationUpdater {
     pub fn new() -> ExtractedFunctionLocationUpdater {
         ExtractedFunctionLocationUpdater { insert_after: Default::default() }
@@ -22,10 +29,6 @@ impl ExtractedFunctionLocationUpdater {
     ) {
         if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
             // Update the location information to reference the argument
-            //NOTE: This can't use AddressType::Variable or AddressType::Signal
-            //  because ExtractedFunctionLLVMIRProducer references the first two
-            //  parameters with those. So this has to use SubcmpSignal (it should
-            //  work fine because subcomps will also just be additional params).
             bucket.address_type = AddressType::SubcmpSignal {
                 cmp_address: build_u32_value(bucket, ai.get_signal_idx()),
                 uniform_parallel_value: None,
@@ -44,73 +47,181 @@ impl ExtractedFunctionLocationUpdater {
         }
     }
 
-    fn check_store_bucket(
+    fn handle_any_store(
         &mut self,
-        bucket: &mut StoreBucket,
-        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
-    ) {
-        // Check the source/RHS of the store in either case
-        self.check_instruction(&mut bucket.src, bucket_arg_order);
-        //
-        if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
-            // If needed, add a StoreBucket to 'insert_after' that will call the template_run function.
-            // NOTE: This must happen before the modification step so it can use existing values from the bucket.
-            if let ArgIndex::SubCmp { arena, .. } = ai {
-                self.insert_after.push(
-                    StoreBucket {
+        ai: &ArgIndex,
+        dest: &LocationRule,
+        bucket_meta: &dyn ObtainMeta,
+    ) -> (AddressType, LocationRule) {
+        // If the current argument involves an actual subcomponent, then generate additional code in the
+        // 'insert_after' list that will decrement the subcomponent counter and call the proper "_run"
+        //  function for the template when the counter reaches 0.
+        // NOTE: This must happen before the modification step so it can use existing values from the bucket.
+        if let ArgIndex::SubCmp { counter, arena, .. } = ai {
+            let counter_address = AddressType::SubcmpSignal {
+                cmp_address: new_u32_value(bucket_meta, *counter),
+                uniform_parallel_value: None,
+                counter_override: true,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            // Generate counter LoadBucket+ComputeBucket+StoreBucket in the "insert_after" list
+            //  (based on what StoreBucket::produce_llvm_ir would normally generate for this).
+            self.insert_after.push(
+                StoreBucket {
+                    id: new_id(),
+                    source_file_id: bucket_meta.get_source_file_id().clone(),
+                    line: bucket_meta.get_line(),
+                    message_id: bucket_meta.get_message_id(),
+                    context: InstrContext { size: 1 },
+                    dest_is_output: false,
+                    dest_address_type: counter_address.clone(),
+                    dest: LocationRule::Indexed {
+                        location: new_u32_value(bucket_meta, 0),
+                        template_header: None,
+                    },
+                    bounded_fn: None,
+                    src: ComputeBucket {
                         id: new_id(),
-                        source_file_id: bucket.source_file_id.clone(),
-                        line: bucket.line,
-                        message_id: bucket.message_id,
-                        context: bucket.context.clone(),
-                        dest_is_output: bucket.dest_is_output,
+                        source_file_id: bucket_meta.get_source_file_id().clone(),
+                        line: bucket_meta.get_line(),
+                        message_id: bucket_meta.get_message_id(),
+                        op: OperatorType::Sub,
+                        op_aux_no: 0,
+                        stack: vec![
+                            LoadBucket {
+                                id: new_id(),
+                                source_file_id: bucket_meta.get_source_file_id().clone(),
+                                line: bucket_meta.get_line(),
+                                message_id: bucket_meta.get_message_id(),
+                                address_type: counter_address.clone(),
+                                src: LocationRule::Indexed {
+                                    location: new_u32_value(bucket_meta, 0),
+                                    template_header: None,
+                                },
+                                bounded_fn: None,
+                            }
+                            .allocate(),
+                            ValueBucket {
+                                id: new_id(),
+                                source_file_id: bucket_meta.get_source_file_id().clone(),
+                                line: bucket_meta.get_line(),
+                                message_id: bucket_meta.get_message_id(),
+                                parse_as: ValueType::U32,
+                                op_aux_no: 0,
+                                value: 1,
+                            }
+                            .allocate(),
+                        ],
+                    }
+                    .allocate(),
+                }
+                .allocate(),
+            );
+
+            // Generate code to call the "run" function if the counter reaches 0
+            self.insert_after.push(
+                BranchBucket {
+                    id: new_id(),
+                    source_file_id: bucket_meta.get_source_file_id().clone(),
+                    line: bucket_meta.get_line(),
+                    message_id: bucket_meta.get_message_id(),
+                    cond: ComputeBucket {
+                        id: new_id(),
+                        source_file_id: bucket_meta.get_source_file_id().clone(),
+                        line: bucket_meta.get_line(),
+                        message_id: bucket_meta.get_message_id(),
+                        op: OperatorType::Eq(1),
+                        op_aux_no: 0,
+                        stack: vec![
+                            LoadBucket {
+                                id: new_id(),
+                                source_file_id: bucket_meta.get_source_file_id().clone(),
+                                line: bucket_meta.get_line(),
+                                message_id: bucket_meta.get_message_id(),
+                                address_type: counter_address,
+                                src: LocationRule::Indexed {
+                                    location: new_u32_value(bucket_meta, 0),
+                                    template_header: None,
+                                },
+                                bounded_fn: None,
+                            }
+                            .allocate(),
+                            ValueBucket {
+                                id: new_id(),
+                                source_file_id: bucket_meta.get_source_file_id().clone(),
+                                line: bucket_meta.get_line(),
+                                message_id: bucket_meta.get_message_id(),
+                                parse_as: ValueType::U32,
+                                op_aux_no: 0,
+                                value: 0,
+                            }
+                            .allocate(),
+                        ],
+                    }
+                    .allocate(),
+                    if_branch: vec![StoreBucket {
+                        id: new_id(),
+                        source_file_id: bucket_meta.get_source_file_id().clone(),
+                        line: bucket_meta.get_line(),
+                        message_id: bucket_meta.get_message_id(),
+                        context: InstrContext { size: 1 },
+                        dest_is_output: false,
                         dest_address_type: AddressType::SubcmpSignal {
-                            cmp_address: build_u32_value(bucket, arena),
+                            cmp_address: build_u32_value(bucket_meta, *arena),
                             uniform_parallel_value: None,
                             counter_override: false,
                             is_output: false,
-                            //TODO: Not sure what to put here. If I put Unknown (assuming the later pass
-                            //  would correct) it crashes somewhere. What I really need is Last in the
-                            //  proper place to make it generate the *_run function at the right time
-                            //  but NoLast in locations prior to that (I think). Why isn't Unknown handled
-                            //  by the later pass deterministic subcomp pass or something? Always using
-                            //  Last here could result in the run function being called too soon.
-                            //SEE: circom/tests/subcmps/subcmps0C.circom
                             input_information: InputInformation::Input {
-                                status: StatusInput::Unknown, // We don't know but we need to make the subsequent passes fix this
+                                status: StatusInput::Last, // This is the key to generating call to "run" function
                             },
                         },
                         dest: LocationRule::Indexed {
-                            location: build_u32_value(bucket, 0), //the value here is ignored by the 'bounded_fn' below
-                            template_header: match &bucket.dest {
+                            location: build_u32_value(bucket_meta, 0), //the value here is ignored by the 'bounded_fn' below
+                            template_header: match dest {
                                 LocationRule::Indexed { template_header, .. } => {
                                     template_header.clone()
                                 }
                                 LocationRule::Mapped { .. } => todo!(),
                             },
                         },
-                        src: build_u32_value(bucket, 0), //the value here is ignored at runtime
-                        bounded_fn: Some(String::from(FR_IDENTITY_ARR_PTR)), //NOTE: doesn't have enough arguments but it works out
+                        src: build_u32_value(bucket_meta, 0), //the value here is ignored at runtime
+                        bounded_fn: Some(String::from(LLVM_DONOTHING_FN_NAME)), // actual result ignored, only need effect of 'StatusInput::Last'
                     }
-                    .allocate(),
-                );
-                // NOTE: Not adding counter for now because it shouldn't be needed anyway and it's more work to add.
-                //  The best approach would probably be to generate Load+Compute+Store (based on what StoreBucket
-                //  would normally generate for it) in an "insert_before" list just like the "insert_after" list.
-            }
-
-            //Transform this bucket into the normal fixed-index signal reference
-            bucket.dest_address_type = AddressType::SubcmpSignal {
-                cmp_address: build_u32_value(bucket, ai.get_signal_idx()),
+                    .allocate()],
+                    else_branch: vec![],
+                }
+                .allocate(),
+            );
+        }
+        //Transform this bucket into the normal fixed-index signal reference
+        (
+            AddressType::SubcmpSignal {
+                cmp_address: build_u32_value(bucket_meta, ai.get_signal_idx()),
                 uniform_parallel_value: None,
                 counter_override: false,
                 is_output: false,
                 input_information: InputInformation::NoInput,
-            };
-            bucket.dest = LocationRule::Indexed {
-                location: build_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+            },
+            LocationRule::Indexed {
+                location: new_u32_value(bucket_meta, 0), //use index 0 to ref the entire storage array
                 template_header: None,
-            };
+            },
+        )
+    }
+
+    fn check_store_bucket(
+        &mut self,
+        bucket: &mut StoreBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        // Check the source/RHS of the store in either case
+        self.check_instruction(&mut bucket.src, bucket_arg_order);
+        //
+        if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
+            let (at, lr) = self.handle_any_store(&ai, &bucket.dest, bucket);
+            bucket.dest_address_type = at;
+            bucket.dest = lr;
         } else {
             // If not replacing, check deeper in the AddressType and LocationRule
             self.check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
@@ -118,6 +229,28 @@ impl ExtractedFunctionLocationUpdater {
         }
     }
 
+    fn check_call_bucket(
+        &mut self,
+        bucket: &mut CallBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        // Check the call parameters
+        self.check_instructions(&mut bucket.arguments, bucket_arg_order);
+        // A store can be implicit within a CallBucket 'return_info'
+        let bucket_meta = ObtainMetaImpl::from(bucket); //avoid borrow issues
+        if let ReturnType::Final(fd) = &mut bucket.return_info {
+            if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
+                let (at, lr) = self.handle_any_store(&ai, &fd.dest, &bucket_meta);
+                fd.dest_address_type = at;
+                fd.dest = lr;
+            } else {
+                // If not replacing, check deeper in the AddressType and LocationRule
+                self.check_address_type(&mut fd.dest_address_type, bucket_arg_order);
+                self.check_location_rule(&mut fd.dest, bucket_arg_order);
+            }
+        }
+    }
+
     fn check_location_rule(
         &mut self,
         location_rule: &mut LocationRule,
@@ -196,14 +329,6 @@ impl ExtractedFunctionLocationUpdater {
         self.check_instructions(&mut bucket.body, bucket_arg_order);
     }
 
-    fn check_call_bucket(
-        &mut self,
-        bucket: &mut CallBucket,
-        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
-    ) {
-        self.check_instructions(&mut bucket.arguments, bucket_arg_order);
-    }
-
     fn check_branch_bucket(
         &mut self,
         bucket: &mut BranchBucket,
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 9adfca74e..ef51cbb72 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -1,5 +1,5 @@
 use std::cell::{RefCell, Ref};
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashSet};
 use std::fmt::{Debug, Formatter};
 use indexmap::IndexMap;
 use compiler::intermediate_representation::BucketId;
@@ -9,12 +9,13 @@ use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::GlobalPassData;
-use super::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
+use super::DEBUG_LOOP_UNROLL;
+use super::body_extractor::{UnrolledIterLvars, ToOriginalLocation, FuncArgIdx};
 
 /// Holds values of index variables at array loads/stores within a loop
 pub struct VariableValues<'a> {
     pub env_at_header: Env<'a>,
-    /// The key is the ID of the load/store bucket where the reference is located.
+    /// The key is the ID of the load/store/call bucket where the reference is located.
     /// NOTE: Uses IndexMap to preserve insertion order to stabilize lit test output.
     pub loadstore_to_index: IndexMap<BucketId, (AddressType, Value)>,
 }
@@ -106,7 +107,7 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
         &self,
         extract_func: String,
         iter_env: UnrolledIterLvars,
-        value: ToOriginalLocation,
+        value: (ToOriginalLocation, HashSet<FuncArgIdx>),
     ) {
         self.global_data
             .borrow_mut()
@@ -171,6 +172,12 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
     fn visit(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
         let loc_result = self.compute_index_from_rule(env, loc);
         if loc_result == Value::Unknown {
+            if DEBUG_LOOP_UNROLL {
+                println!(
+                    "loop body is not safe to move because index is unknown from rule {:?}",
+                    loc
+                );
+            }
             self.safe_to_move.replace(false);
         }
         //NOTE: must record even when Unknown to ensure that Unknown value is not confused with
@@ -189,6 +196,9 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
                 AddressType::SubcmpSignal {
                     cmp_address: {
                         if addr_result == Value::Unknown {
+                            if DEBUG_LOOP_UNROLL {
+                                println!("loop body is not safe to move because index is unknown from addr {:?}", cmp_address);
+                            }
                             self.safe_to_move.replace(false);
                             NopBucket { id: 0 }.allocate()
                         } else {
@@ -214,7 +224,7 @@ impl InterpreterObserver for EnvRecorder<'_, '_> {
             todo!(); //not sure if/how to handle that
         }
         self.visit(&bucket.id, &bucket.address_type, &bucket.src, env);
-        true
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
     }
 
     fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {
@@ -222,7 +232,14 @@ impl InterpreterObserver for EnvRecorder<'_, '_> {
             todo!(); //not sure if/how to handle that
         }
         self.visit(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
-        true
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_call_bucket(&self, bucket: &CallBucket, env: &Env) -> bool {
+        if let ReturnType::Final(fd) = &bucket.return_info {
+            self.visit(&bucket.id, &fd.dest_address_type, &fd.dest, env);
+        }
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
     }
 
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
@@ -261,10 +278,6 @@ impl InterpreterObserver for EnvRecorder<'_, '_> {
         self.is_safe_to_move() //continue observing unless something unsafe has been found
     }
 
-    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
     fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
         self.is_safe_to_move() //continue observing unless something unsafe has been found
     }
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index e159dc098..f9e70aac6 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -21,6 +21,8 @@ use self::body_extractor::LoopBodyExtractor;
 
 const EXTRACT_LOOP_BODY_TO_NEW_FUNC: bool = true;
 
+const DEBUG_LOOP_UNROLL: bool = false;
+
 pub const LOOP_BODY_FN_PREFIX: &str = const_format::concatcp!(GENERATED_FN_PREFIX, "loop.body.");
 
 pub struct LoopUnrollPass<'d> {
@@ -42,6 +44,21 @@ impl<'d> LoopUnrollPass<'d> {
     }
 
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
+        if DEBUG_LOOP_UNROLL {
+            println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
+            for (i, s) in bucket.body.iter().enumerate() {
+                println!(
+                    "[{}/{}]{}",
+                    i + 1,
+                    bucket.body.len(),
+                    compiler::intermediate_representation::ToSExp::to_sexp(&**s).to_pretty(100)
+                );
+            }
+            for (i, s) in bucket.body.iter().enumerate() {
+                println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
+            }
+            println!("LOOP ENTRY env {}", env); //TODO: TEMP
+        }
         // Compute loop iteration count. If unknown, return immediately.
         let recorder = EnvRecorder::new(self.global_data, &self.memory);
         {
@@ -62,6 +79,9 @@ impl<'d> LoopUnrollPass<'d> {
                 inner_env = new_env;
             }
         }
+        if DEBUG_LOOP_UNROLL {
+            println!("recorder = {:?}", recorder);
+        }
 
         let mut block_body = vec![];
         if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() && recorder.get_iter() > 0 {
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index bc4c39fda..4a77511df 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -14,7 +14,7 @@ use crate::passes::{
     simplification::SimplificationPass, unknown_index_sanitization::UnknownIndexSanitizationPass,
 };
 
-use self::loop_unroll::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
+use self::loop_unroll::body_extractor::{UnrolledIterLvars, ToOriginalLocation, FuncArgIdx};
 
 mod const_arg_deduplication;
 mod conditional_flattening;
@@ -429,7 +429,8 @@ pub struct GlobalPassData {
     /// (from Env::get_vars_sort) to location reference in the original function. Used
     /// by ExtractedFuncEnvData to access the original function's Env via the extracted
     /// function's parameter references.
-    extract_func_orig_loc: HashMap<String, BTreeMap<UnrolledIterLvars, ToOriginalLocation>>,
+    extract_func_orig_loc:
+        HashMap<String, BTreeMap<UnrolledIterLvars, (ToOriginalLocation, HashSet<FuncArgIdx>)>>,
 }
 
 impl GlobalPassData {
@@ -440,7 +441,7 @@ impl GlobalPassData {
     pub fn get_data_for_func(
         &self,
         name: &String,
-    ) -> &BTreeMap<UnrolledIterLvars, ToOriginalLocation> {
+    ) -> &BTreeMap<UnrolledIterLvars, (ToOriginalLocation, HashSet<FuncArgIdx>)> {
         match self.extract_func_orig_loc.get(name) {
             Some(x) => x,
             None => {
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 2d0c1cec7..d151dfb48 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -161,6 +161,14 @@ impl<'a> ExtractedFunctionCtx<'a> {
             format!("No signals argument for {:?}", self.current_function.get_name()).as_str(),
         )
     }
+
+    fn get_arg_ptr(&self, id: AnyValueEnum<'a>) -> PointerValue<'a> {
+        let num = id
+            .into_int_value()
+            .get_zero_extended_constant()
+            .expect("must reference a constant argument index");
+        *self.args.get(num as usize).expect("must reference a known argument index")
+    }
 }
 
 impl<'a> BodyCtx<'a> for ExtractedFunctionCtx<'a> {
@@ -192,20 +200,23 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
         _producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
     ) -> PointerValue<'a> {
-        let num = id
-            .into_int_value()
-            .get_zero_extended_constant()
-            .expect("must reference a constant argument index");
-        *self.args.get(num as usize).expect("must reference a known argument index")
+        self.get_arg_ptr(id)
     }
 
     fn load_subcmp_counter(
         &self,
-        _producer: &dyn LLVMIRProducer<'a>,
-        _id: AnyValueEnum<'a>,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+        implicit: bool,
     ) -> Option<PointerValue<'a>> {
-        // Use None to force StoreBucket::produce_llvm_ir to skip counter increment.
-        None
+        if implicit {
+            // Use None for the implicit case from StoreBucket::produce_llvm_ir so it will
+            //  skip the counter decrement when using this ExtractedFunctionCtx because the
+            //  counter decrement is generated explicitly inside the extracted functions.
+            None
+        } else {
+            Some(self.get_arg_ptr(id))
+        }
     }
 
     fn get_signal(
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index 73a70c746..e876cd7de 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -66,6 +66,7 @@ pub trait TemplateCtx<'a> {
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
+        implicit: bool,
     ) -> Option<PointerValue<'a>>;
 
     /// Returns a pointer to the signal associated to the index
diff --git a/code_producers/src/llvm_elements/stdlib.rs b/code_producers/src/llvm_elements/stdlib.rs
index 27546f463..b6fba2a9d 100644
--- a/code_producers/src/llvm_elements/stdlib.rs
+++ b/code_producers/src/llvm_elements/stdlib.rs
@@ -7,8 +7,10 @@ pub const GENERATED_FN_PREFIX: &str = "..generated..";
 pub const CONSTRAINT_VALUES_FN_NAME: &str = "__constraint_values";
 pub const CONSTRAINT_VALUE_FN_NAME: &str = "__constraint_value";
 pub const ASSERT_FN_NAME: &str = "__assert";
+pub const LLVM_DONOTHING_FN_NAME: &str = "llvm.donothing";
 
 mod stdlib {
+    use inkwell::intrinsics::Intrinsic;
     use inkwell::values::AnyValue;
 
     use crate::llvm_elements::functions::{create_bb, create_function};
@@ -16,11 +18,18 @@ mod stdlib {
         create_br, create_call, create_conditional_branch, create_eq, create_return_void,
         create_store,
     };
-    use crate::llvm_elements::stdlib::{
-        ASSERT_FN_NAME, CONSTRAINT_VALUE_FN_NAME, CONSTRAINT_VALUES_FN_NAME,
-    };
     use crate::llvm_elements::LLVMIRProducer;
     use crate::llvm_elements::types::{bigint_type, bool_type, void_type};
+    use super::{
+        ASSERT_FN_NAME, CONSTRAINT_VALUE_FN_NAME, CONSTRAINT_VALUES_FN_NAME, LLVM_DONOTHING_FN_NAME,
+    };
+
+    pub fn llvm_donothing_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+        Intrinsic::find(LLVM_DONOTHING_FN_NAME)
+            .unwrap()
+            .get_declaration(&producer.llvm().module, &[])
+            .unwrap();
+    }
 
     pub fn constraint_values_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
         let bigint_ty = bigint_type(producer);
@@ -100,6 +109,7 @@ mod stdlib {
 }
 
 pub fn load_stdlib<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    stdlib::llvm_donothing_fn(producer);
     stdlib::constraint_values_fn(producer);
     stdlib::constraint_value_fn(producer);
     stdlib::abort_declared_fn(producer);
diff --git a/code_producers/src/llvm_elements/template.rs b/code_producers/src/llvm_elements/template.rs
index d23880676..4cfd418e5 100644
--- a/code_producers/src/llvm_elements/template.rs
+++ b/code_producers/src/llvm_elements/template.rs
@@ -90,6 +90,7 @@ impl<'a> TemplateCtx<'a> for StdTemplateCtx<'a> {
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
+        _implicit: bool,
     ) -> Option<PointerValue<'a>> {
         Some(
             create_gep(
diff --git a/compiler/src/intermediate_representation/ir_interface.rs b/compiler/src/intermediate_representation/ir_interface.rs
index 96c6e3a50..84d6e920a 100644
--- a/compiler/src/intermediate_representation/ir_interface.rs
+++ b/compiler/src/intermediate_representation/ir_interface.rs
@@ -37,6 +37,36 @@ pub trait ObtainMeta {
     fn get_message_id(&self) -> usize;
 }
 
+pub struct ObtainMetaImpl {
+    source_file_id: Option<usize>,
+    line: usize,
+    message_id: usize,
+}
+
+impl ObtainMeta for ObtainMetaImpl {
+    fn get_source_file_id(&self) -> &Option<usize> {
+        &self.source_file_id
+    }
+
+    fn get_line(&self) -> usize {
+        self.line
+    }
+
+    fn get_message_id(&self) -> usize {
+        self.message_id
+    }
+}
+
+impl ObtainMetaImpl {
+    pub fn from(bucket: &dyn ObtainMeta) -> ObtainMetaImpl {
+        ObtainMetaImpl {
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+        }
+    }
+}
+
 pub trait CheckCompute {
     fn has_compute_in(&self) -> bool;
 }
diff --git a/compiler/src/intermediate_representation/load_bucket.rs b/compiler/src/intermediate_representation/load_bucket.rs
index 564715379..abd714783 100644
--- a/compiler/src/intermediate_representation/load_bucket.rs
+++ b/compiler/src/intermediate_representation/load_bucket.rs
@@ -95,7 +95,7 @@ impl WriteLLVMIR for LoadBucket {
                             let addr = cmp_address.produce_llvm_ir(producer)
                                 .expect("The address of a subcomponent must yield a value!");
                             if *counter_override {
-                                return producer.template_ctx().load_subcmp_counter(producer, addr).expect("could not find counter!")
+                                return producer.template_ctx().load_subcmp_counter(producer, addr, false).expect("could not find counter!")
                             } else {
                                 let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                                 create_gep(producer, subcmp, &[zero(producer)])
@@ -113,7 +113,7 @@ impl WriteLLVMIR for LoadBucket {
                     AddressType::SubcmpSignal { cmp_address, counter_override, ..  } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
                         if *counter_override {
-                            producer.template_ctx().load_subcmp_counter(producer, addr).expect("could not find counter!")
+                            producer.template_ctx().load_subcmp_counter(producer, addr, false).expect("could not find counter!")
                         } else {
                             let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                             if subcmp.get_type().get_element_type().is_array_type() {
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 0787c9d38..fa0e8def6 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -8,6 +8,7 @@ use code_producers::llvm_elements::instructions::{
     create_call, create_gep, create_load_with_name, create_store, create_sub_with_name,
     pointer_cast,
 };
+use code_producers::llvm_elements::stdlib::LLVM_DONOTHING_FN_NAME;
 use code_producers::llvm_elements::values::{create_literal_u32, zero};
 use code_producers::wasm_elements::*;
 use crate::intermediate_representation::{BucketId, new_id, SExp, ToSExp, UpdateId};
@@ -86,7 +87,7 @@ impl UpdateId for StoreBucket {
     }
 }
 
-impl StoreBucket{
+impl StoreBucket {
     /// The caller must manage the debug location information before calling this function.
     pub fn produce_llvm_ir<'a, 'b>(
         producer: &'b dyn LLVMIRProducer<'a>,
@@ -96,7 +97,10 @@ impl StoreBucket{
         context: InstrContext,
         bounded_fn: &Option<String>,
     ) -> Option<LLVMInstruction<'a>> {
-        let dest_index = dest.produce_llvm_ir(producer).expect("We need to produce some kind of instruction!").into_int_value();
+        let dest_index = dest
+            .produce_llvm_ir(producer)
+            .expect("We need to produce some kind of instruction!")
+            .into_int_value();
 
         let mut source = match src {
             Either::Left(s) => s,
@@ -107,24 +111,38 @@ impl StoreBucket{
         let store = match &bounded_fn {
             Some(name) => {
                 assert_eq!(1, context.size, "unhandled array store");
-                let arr_ptr = match &dest_address_type {
-                    AddressType::Variable => producer.body_ctx().get_variable_array(producer),
-                    AddressType::Signal => producer.template_ctx().get_signal_array(producer),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
-                        let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                        let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer)])
+                if name == LLVM_DONOTHING_FN_NAME {
+                    //LLVM equivalent of a "nop" instruction
+                    create_call(producer, LLVM_DONOTHING_FN_NAME, &[])
+                } else {
+                    let arr_ptr = match &dest_address_type {
+                        AddressType::Variable => producer.body_ctx().get_variable_array(producer),
+                        AddressType::Signal => producer.template_ctx().get_signal_array(producer),
+                        AddressType::SubcmpSignal { cmp_address, .. } => {
+                            let addr = cmp_address
+                                .produce_llvm_ir(producer)
+                                .expect("The address of a subcomponent must yield a value!");
+                            let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
+                            create_gep(producer, subcmp, &[zero(producer)])
+                        }
                     }
-                }.into_pointer_value();
-                let arr_ptr = pointer_cast(producer, arr_ptr, array_ptr_ty(producer));
-                create_call(producer, name.as_str(), &[arr_ptr.into(), dest_index.into(), source.into_int_value().into()])
+                    .into_pointer_value();
+                    let arr_ptr = pointer_cast(producer, arr_ptr, array_ptr_ty(producer));
+                    create_call(
+                        producer,
+                        name.as_str(),
+                        &[arr_ptr.into(), dest_index.into(), source.into_int_value().into()],
+                    )
+                }
             }
             None => {
                 let dest_gep = match &dest_address_type {
                     AddressType::Variable => producer.body_ctx().get_variable(producer, dest_index),
                     AddressType::Signal => producer.template_ctx().get_signal(producer, dest_index),
                     AddressType::SubcmpSignal { cmp_address, .. } => {
-                        let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
+                        let addr = cmp_address
+                            .produce_llvm_ir(producer)
+                            .expect("The address of a subcomponent must yield a value!");
                         let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                         if subcmp.get_type().get_element_type().is_array_type() {
                             create_gep(producer, subcmp, &[zero(producer), dest_index])
@@ -133,7 +151,8 @@ impl StoreBucket{
                             create_gep(producer, subcmp, &[dest_index])
                         }
                     }
-                }.into_pointer_value();
+                }
+                .into_pointer_value();
                 if context.size > 1 {
                     // In the non-scalar case, produce an array copy. If the stored source
                     //  is a LoadBucket, first convert it into an address.
@@ -152,9 +171,9 @@ impl StoreBucket{
                                     producer.template_ctx().get_signal(producer, src_index)
                                 }
                                 AddressType::SubcmpSignal { cmp_address, .. } => {
-                                    let addr = cmp_address
-                                        .produce_llvm_ir(producer)
-                                        .expect("The address of a subcomponent must yield a value!");
+                                    let addr = cmp_address.produce_llvm_ir(producer).expect(
+                                        "The address of a subcomponent must yield a value!",
+                                    );
                                     let subcmp =
                                         producer.template_ctx().load_subcmp_addr(producer, addr);
                                     create_gep(producer, subcmp, &[zero(producer), src_index])
@@ -165,7 +184,11 @@ impl StoreBucket{
                     create_call(
                         producer,
                         FR_ARRAY_COPY_FN_NAME,
-                        &[source.into_pointer_value().into(), dest_gep.into(), create_literal_u32(producer, context.size as u64).into()],
+                        &[
+                            source.into_pointer_value().into(),
+                            dest_gep.into(),
+                            create_literal_u32(producer, context.size as u64).into(),
+                        ],
                     )
                 } else {
                     // In the scalar case, just produce a store from the source value that was given
@@ -177,7 +200,7 @@ impl StoreBucket{
         // If we have a subcomponent storage decrement the counter by the size of the store (i.e., context.size)
         if let AddressType::SubcmpSignal { cmp_address, .. } = &dest_address_type {
             let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-            let counter = producer.template_ctx().load_subcmp_counter(producer, addr);
+            let counter = producer.template_ctx().load_subcmp_counter(producer, addr, true);
             if let Some(counter) = counter {
                 let value = create_load_with_name(producer, counter, "load.subcmp.counter");
                 let new_value = create_sub_with_name(producer, value.into_int_value(), create_literal_u32(producer, context.size as u64), "decrement.counter");
@@ -187,17 +210,22 @@ impl StoreBucket{
 
         // If the input information is unknown add a check that checks the counter and if its zero call the subcomponent
         // If its last just call run directly
-        if let AddressType::SubcmpSignal { input_information, cmp_address, .. } = &dest_address_type {
+        if let AddressType::SubcmpSignal { input_information, cmp_address, .. } = &dest_address_type
+        {
             if let InputInformation::Input { status } = input_information {
                 let sub_cmp_name = match &dest {
                     LocationRule::Indexed { template_header, .. } => template_header.clone(),
-                    LocationRule::Mapped { .. } => None
+                    LocationRule::Mapped { .. } => None,
                 };
                 match status {
                     StatusInput::Last => {
-                        let run_fn = run_fn_name(sub_cmp_name.expect("Could not get the name of the subcomponent"));
+                        let run_fn = run_fn_name(
+                            sub_cmp_name.expect("Could not get the name of the subcomponent"),
+                        );
                         // If we reach this point gep is the address of the subcomponent so we can just reuse it
-                        let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
+                        let addr = cmp_address
+                            .produce_llvm_ir(producer)
+                            .expect("The address of a subcomponent must yield a value!");
                         let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                         create_call(producer, run_fn.as_str(), &[subcmp.into()]);
                     }
@@ -211,7 +239,7 @@ impl StoreBucket{
                         // // Here we need to get the counter and check if its 0
                         // // If its is then call the run function because it means that all signals have been assigned
                         // let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                        // let counter = producer.template_ctx().load_subcmp_counter(producer, addr);
+                        // let counter = producer.template_ctx().load_subcmp_counter(producer, addr, false);
                         // let value = create_load_with_name(producer, counter, "load.subcmp.counter");
                         // let is_zero = create_eq_with_name(producer, zero(producer), value.into_int_value(), "subcmp.counter.isZero");
                         // create_conditional_branch(producer, is_zero.into_int_value(), run_bb, continue_bb);

From 8449d2a28b4c250ccddbae24a1e00bc1aebcb5a0 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 5 Oct 2023 10:32:51 -0500
Subject: [PATCH 06/22] impl Debug for Env to be more friendly, expand on test
 case comment

---
 circom/tests/loops/inner_conditional_10.circom   |  2 +-
 circuit_passes/src/bucket_interpreter/env/mod.rs | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/circom/tests/loops/inner_conditional_10.circom b/circom/tests/loops/inner_conditional_10.circom
index 27cf80562..d22053ee9 100644
--- a/circom/tests/loops/inner_conditional_10.circom
+++ b/circom/tests/loops/inner_conditional_10.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.* // TODO: branch conditions are not be flattened fully for some reason
+// XFAIL:.* // TODO: in addition to not extracting to a new function, branch conditions are not flattened for some reason which causes a panic in StoreBucket
 
 template Sigma() {
     signal input inp;
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index 537ac3c80..c9908c3fa 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -92,6 +92,16 @@ impl Display for Env<'_> {
     }
 }
 
+impl std::fmt::Debug for Env<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        match self {
+            Env::Standard(d) => d.fmt(f),
+            Env::UnrolledBlock(d) => d.fmt(f),
+            Env::ExtractedFunction(d) => d.fmt(f),
+        }
+    }
+}
+
 impl LibraryAccess for Env<'_> {
     fn get_function(&self, name: &String) -> Ref<FunctionCode> {
         match self {

From 8b1c6abc27c573be936ab8173d126097723a488b Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 5 Oct 2023 11:51:18 -0500
Subject: [PATCH 07/22] remove debug println

---
 circuit_passes/src/passes/conditional_flattening.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 9044450c6..96db4f199 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -98,7 +98,6 @@ impl InterpreterObserver for ConditionalFlatteningPass<'_> {
     }
 
     fn on_branch_bucket(&self, bucket: &BranchBucket, env: &Env) -> bool {
-        println!("conditional_flattening::on_branch_bucket = {:?}", bucket.id);
         let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (_, cond_result, _) = interpreter.execute_conditional_bucket(
             &bucket.cond,

From 3ae300bc15875d9e7dc116bb0f7582f344f08203 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 9 Oct 2023 13:28:44 -0500
Subject: [PATCH 08/22] fix: conditions not flattening in non-generated methods

---
 .../tests/loops/inner_conditional_10.circom   |  1 -
 .../src/passes/conditional_flattening.rs      | 37 ++++++++++++-------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/circom/tests/loops/inner_conditional_10.circom b/circom/tests/loops/inner_conditional_10.circom
index d22053ee9..ddc93e38e 100644
--- a/circom/tests/loops/inner_conditional_10.circom
+++ b/circom/tests/loops/inner_conditional_10.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.* // TODO: in addition to not extracting to a new function, branch conditions are not flattened for some reason which causes a panic in StoreBucket
 
 template Sigma() {
     signal input inp;
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 96db4f199..e90ad54ec 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -46,6 +46,17 @@ impl<'d> ConditionalFlatteningPass<'d> {
             caller_context: RefCell::new(None),
         }
     }
+
+    fn get_known_condition(&self, bucket_id: &BucketId) -> Option<bool> {
+        // Get from the current 'caller_context' or lookup via None key in 'evaluated_conditions'
+        let ec = self.evaluated_conditions.borrow();
+        if let Some(bv) = self.caller_context.borrow().as_ref().or_else(|| ec.get(&None)) {
+            if let Some(Some(side)) = bv.get(bucket_id) {
+                return Some(*side);
+            }
+        }
+        None
+    }
 }
 
 impl InterpreterObserver for ConditionalFlatteningPass<'_> {
@@ -248,20 +259,18 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
     }
 
     fn transform_branch_bucket(&self, bucket: &BranchBucket) -> InstructionPointer {
-        if let Some(bv) = self.caller_context.borrow().as_ref() {
-            if let Some(Some(side)) = bv.get(&bucket.id) {
-                let code = if *side { &bucket.if_branch } else { &bucket.else_branch };
-                let block = BlockBucket {
-                    id: new_id(),
-                    source_file_id: bucket.source_file_id,
-                    line: bucket.line,
-                    message_id: bucket.message_id,
-                    body: code.clone(),
-                    n_iters: 1,
-                    label: format!("fold_{}", side),
-                };
-                return self.transform_block_bucket(&block);
-            }
+        if let Some(side) = self.get_known_condition(&bucket.id) {
+            let code = if side { &bucket.if_branch } else { &bucket.else_branch };
+            let block = BlockBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                body: code.clone(),
+                n_iters: 1,
+                label: format!("fold_{}", side),
+            };
+            return self.transform_block_bucket(&block);
         }
         // Default case: no change
         BranchBucket {

From 7ab111505c76b7eab9953abefeecd379e0c3d19a Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 18 Oct 2023 14:44:24 -0500
Subject: [PATCH 09/22] Fix the build and tests (due to rebasing)

---
 circom/tests/loops/inner_conditional_11.circom         |  1 +
 circom/tests/subcmps/mapped.circom                     |  2 --
 circom/tests/subcmps/mapped2.circom                    |  1 -
 circom/tests/subcmps/subcmps2.circom                   |  1 -
 circom/tests/subcmps/subcmps3.circom                   |  1 +
 .../src/passes/loop_unroll/body_extractor.rs           |  4 ++--
 .../passes/loop_unroll/extracted_location_updater.rs   | 10 +++++-----
 code_producers/src/llvm_elements/functions.rs          |  2 +-
 8 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/circom/tests/loops/inner_conditional_11.circom b/circom/tests/loops/inner_conditional_11.circom
index a18a080ee..12b303ad2 100644
--- a/circom/tests/loops/inner_conditional_11.circom
+++ b/circom/tests/loops/inner_conditional_11.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template Sigma() {
     signal input inp;
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index b089cb571..7726d39ed 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -2,8 +2,6 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
-// TODO: I think it has problems related to both https://veridise.atlassian.net/browse/VAN-582 and https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index 2e542d7a1..dba58ffe6 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -2,7 +2,6 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/subcmps2.circom b/circom/tests/subcmps/subcmps2.circom
index e7a6eaa04..faf7d0dbf 100644
--- a/circom/tests/subcmps/subcmps2.circom
+++ b/circom/tests/subcmps/subcmps2.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.6;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template Sum(n) {
     signal input inp[n];
diff --git a/circom/tests/subcmps/subcmps3.circom b/circom/tests/subcmps/subcmps3.circom
index 63eac590b..ec49bb91a 100644
--- a/circom/tests/subcmps/subcmps3.circom
+++ b/circom/tests/subcmps/subcmps3.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template Sum(n) {
     signal input inp[n];
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index d77527399..86b08407f 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -356,13 +356,13 @@ impl LoopBodyExtractor {
                 println!("bucket {} refs by iteration: {:?}", id, column);
             }
             // ASSERT: same AddressType kind for this bucket in every (available) iteration
-            assert!(Self::all_same(Self::filter_map(column, |(x, _)| std::mem::discriminant(x))));
+            assert!(checks::all_same(Self::filter_map(column, |(x, _)| std::mem::discriminant(x))));
 
             // If the computed index value for this bucket is NOT the same across all available
             //  iterations (i.e. where it is not None, see earlier comment) or if the AddressType
             //  is SubcmpSignal, then an extra function argument is needed for it.
             if Self::filter_map_any(column, |(x, _)| matches!(x, AddressType::SubcmpSignal { .. }))
-                || !Self::all_same(Self::filter_map(column, |(_, y)| *y))
+                || !checks::all_same(Self::filter_map(column, |(_, y)| *y))
             {
                 bucket_to_args.insert(*id, ArgIndex::Signal(next_idx));
                 next_idx += 1;
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
index fb4d7ad68..17bb42d12 100644
--- a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -59,7 +59,7 @@ impl ExtractedFunctionLocationUpdater {
         // NOTE: This must happen before the modification step so it can use existing values from the bucket.
         if let ArgIndex::SubCmp { counter, arena, .. } = ai {
             let counter_address = AddressType::SubcmpSignal {
-                cmp_address: new_u32_value(bucket_meta, *counter),
+                cmp_address: build_u32_value(bucket_meta, *counter),
                 uniform_parallel_value: None,
                 counter_override: true,
                 is_output: false,
@@ -77,7 +77,7 @@ impl ExtractedFunctionLocationUpdater {
                     dest_is_output: false,
                     dest_address_type: counter_address.clone(),
                     dest: LocationRule::Indexed {
-                        location: new_u32_value(bucket_meta, 0),
+                        location: build_u32_value(bucket_meta, 0),
                         template_header: None,
                     },
                     bounded_fn: None,
@@ -96,7 +96,7 @@ impl ExtractedFunctionLocationUpdater {
                                 message_id: bucket_meta.get_message_id(),
                                 address_type: counter_address.clone(),
                                 src: LocationRule::Indexed {
-                                    location: new_u32_value(bucket_meta, 0),
+                                    location: build_u32_value(bucket_meta, 0),
                                     template_header: None,
                                 },
                                 bounded_fn: None,
@@ -141,7 +141,7 @@ impl ExtractedFunctionLocationUpdater {
                                 message_id: bucket_meta.get_message_id(),
                                 address_type: counter_address,
                                 src: LocationRule::Indexed {
-                                    location: new_u32_value(bucket_meta, 0),
+                                    location: build_u32_value(bucket_meta, 0),
                                     template_header: None,
                                 },
                                 bounded_fn: None,
@@ -204,7 +204,7 @@ impl ExtractedFunctionLocationUpdater {
                 input_information: InputInformation::NoInput,
             },
             LocationRule::Indexed {
-                location: new_u32_value(bucket_meta, 0), //use index 0 to ref the entire storage array
+                location: build_u32_value(bucket_meta, 0), //use index 0 to ref the entire storage array
                 template_header: None,
             },
         )
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index d151dfb48..83ab3b1b2 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -205,7 +205,7 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
 
     fn load_subcmp_counter(
         &self,
-        producer: &dyn LLVMIRProducer<'a>,
+        _producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
         implicit: bool,
     ) -> Option<PointerValue<'a>> {

From 329308dc76c6d7907e30b58ea1295fb4f999361d Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 4 Oct 2023 14:00:09 -0500
Subject: [PATCH 10/22] [VAN-582] properly convert mapped to indexed when there
 are multiple versions of the subcomponent

---
 circom/tests/subcmps/conv_map2idx_A.circom    |  2 +-
 .../src/passes/conditional_flattening.rs      |  4 +-
 circuit_passes/src/passes/loop_unroll/mod.rs  |  2 +-
 .../src/passes/mapped_to_indexed.rs           | 69 ++++++++++---------
 circuit_passes/src/passes/mod.rs              | 24 ++++---
 circuit_passes/src/passes/simplification.rs   |  2 +-
 .../src/passes/unknown_index_sanitization.rs  |  4 +-
 .../location_rule.rs                          |  2 +-
 .../store_bucket.rs                           | 13 ++--
 9 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/circom/tests/subcmps/conv_map2idx_A.circom b/circom/tests/subcmps/conv_map2idx_A.circom
index 4ae5fc574..a3c2e43d0 100644
--- a/circom/tests/subcmps/conv_map2idx_A.circom
+++ b/circom/tests/subcmps/conv_map2idx_A.circom
@@ -108,7 +108,7 @@ component main = ComputeValue();
 //CHECK-NEXT:   store i32 %decrement.counter, i32* %6, align 4
 //CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
 //CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
-//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %8)
+//CHECK-NEXT:   call void @GetWeight_0_run([0 x i256]* %8)
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index e90ad54ec..6a804b1be 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -238,7 +238,7 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
                     argument_types: bucket.argument_types.clone(),
                     arguments: self.transform_instructions(&bucket.arguments),
                     arena_size: bucket.arena_size,
-                    return_info: self.transform_return_type(&bucket.return_info),
+                    return_info: self.transform_return_type(&bucket.id, &bucket.return_info),
                 }
                 .allocate();
             }
@@ -253,7 +253,7 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
             argument_types: bucket.argument_types.clone(),
             arguments: self.transform_instructions(&bucket.arguments),
             arena_size: bucket.arena_size,
-            return_info: self.transform_return_type(&bucket.return_info),
+            return_info: self.transform_return_type(&bucket.id, &bucket.return_info),
         }
         .allocate()
     }
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index f9e70aac6..dac33497a 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -101,7 +101,7 @@ impl<'d> LoopUnrollPass<'d> {
                 }
             }
         } else {
-            //If the loop body is not safe to move into a new function, just unroll.
+            //If the loop body is not safe to move into a new function, just unroll in-place.
             for _ in 0..recorder.get_iter() {
                 for s in &bucket.body {
                     let mut copy = s.clone();
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index cdc9a2240..0cec02eb8 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -2,7 +2,7 @@ use std::cell::RefCell;
 use std::collections::BTreeMap;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
-use compiler::intermediate_representation::ir_interface::*;
+use compiler::intermediate_representation::{ir_interface::*, BucketId};
 use compiler::intermediate_representation::{InstructionPointer, UpdateId};
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
@@ -13,9 +13,13 @@ use super::{CircuitTransformationPass, GlobalPassData};
 
 pub struct MappedToIndexedPass<'d> {
     global_data: &'d RefCell<GlobalPassData>,
-    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
-    replacements: RefCell<BTreeMap<LocationRule, LocationRule>>,
+    /// Key is the BucketId of the bucket that holds the original LocationRule instance that needs to be
+    /// replaced and the mapped value is the new Indexed LocationRule. The BucketId must be used as key
+    /// instead of using the old LocationRule itself because the same Mapped LocationRule paired with a
+    /// different AddressType can result in a different Indexed LocationRule.
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
+    replacements: RefCell<BTreeMap<BucketId, LocationRule>>,
 }
 
 impl<'d> MappedToIndexedPass<'d> {
@@ -43,55 +47,49 @@ impl<'d> MappedToIndexedPass<'d> {
             .expect("cmp_address instruction in SubcmpSignal must produce a value!")
             .get_u32();
 
-        let mut acc_env = acc_env;
         let name = acc_env.get_subcmp_name(resolved_addr).clone();
         let io_def =
             self.memory.get_iodef(&acc_env.get_subcmp_template_id(resolved_addr), &signal_code);
-        let map_access = io_def.offset;
-        if indexes.len() > 0 {
+        let offset = if indexes.len() > 0 {
+            let mut acc_env = acc_env;
             let mut indexes_values = vec![];
             for i in indexes {
                 let (val, new_env) = interpreter.execute_instruction(i, acc_env, false);
                 indexes_values.push(val.expect("Mapped location must produce a value!").get_u32());
                 acc_env = new_env;
             }
-            let offset = compute_offset(&indexes_values, &io_def.lengths);
-            LocationRule::Indexed {
-                location: KnownU32(map_access + offset).to_value_bucket(&self.memory).allocate(),
-                template_header: Some(name),
-            }
+            io_def.offset + compute_offset(&indexes_values, &io_def.lengths)
         } else {
-            LocationRule::Indexed {
-                location: KnownU32(map_access).to_value_bucket(&self.memory).allocate(),
-                template_header: Some(name),
-            }
+            io_def.offset
+        };
+        LocationRule::Indexed {
+            location: KnownU32(offset).to_value_bucket(&self.memory).allocate(),
+            template_header: Some(name),
         }
     }
 
-    fn maybe_transform_location_rule(
+    fn maybe_transform_location(
         &self,
+        bucket_id: &BucketId,
         address: &AddressType,
         location: &LocationRule,
         env: &Env,
-    ) -> bool {
-        match address {
-            AddressType::Variable | AddressType::Signal => match location {
-                LocationRule::Indexed { .. } => true,
-                LocationRule::Mapped { .. } => unreachable!(),
-            },
-            AddressType::SubcmpSignal { cmp_address, .. } => match location {
-                LocationRule::Indexed { .. } => true,
-                LocationRule::Mapped { indexes, signal_code } => {
+    ) {
+        match location {
+            LocationRule::Mapped { indexes, signal_code } => match address {
+                AddressType::Variable | AddressType::Signal => unreachable!(), // cannot use mapped
+                AddressType::SubcmpSignal { cmp_address, .. } => {
                     let indexed_rule = self.transform_mapped_loc_to_indexed_loc(
                         cmp_address,
                         indexes,
                         *signal_code,
                         env,
                     );
-                    self.replacements.borrow_mut().insert(location.clone(), indexed_rule);
-                    true
+                    let old = self.replacements.borrow_mut().insert(*bucket_id, indexed_rule);
+                    assert!(old.is_none()); // ensure nothing is unexpectedly overwritten
                 }
             },
+            LocationRule::Indexed { .. } => return, // do nothing for indexed
         }
     }
 }
@@ -102,11 +100,13 @@ impl InterpreterObserver for MappedToIndexedPass<'_> {
     }
 
     fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
-        self.maybe_transform_location_rule(&bucket.address_type, &bucket.src, env)
+        self.maybe_transform_location(&bucket.id, &bucket.address_type, &bucket.src, env);
+        true
     }
 
     fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {
-        self.maybe_transform_location_rule(&bucket.dest_address_type, &bucket.dest, env)
+        self.maybe_transform_location(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
+        true
     }
 
     fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
@@ -183,9 +183,12 @@ impl CircuitTransformationPass for MappedToIndexedPass<'_> {
         iangneal: Let the interpreter run to see if we can find any replacements.
         If so, yield the replacement. Else, just give the default transformation
     */
-    fn transform_location_rule(&self, location_rule: &LocationRule) -> LocationRule {
-        // If the interpreter found a viable transformation, do that.
-        if let Some(indexed_rule) = self.replacements.borrow().get(&location_rule) {
+    fn transform_location_rule(
+        &self,
+        bucket_id: &BucketId,
+        location_rule: &LocationRule,
+    ) -> LocationRule {
+        if let Some(indexed_rule) = self.replacements.borrow().get(bucket_id) {
             let mut clone = indexed_rule.clone();
             clone.update_id(); //generate a new unique ID for the clone to avoid assertion in checks.rs
             return clone;
@@ -195,7 +198,7 @@ impl CircuitTransformationPass for MappedToIndexedPass<'_> {
                 location: self.transform_instruction(location),
                 template_header: template_header.clone(),
             },
-            LocationRule::Mapped { .. } => unreachable!(),
+            LocationRule::Mapped { .. } => unreachable!(), // all Mapped locations were replaced above
         }
     }
 
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 4a77511df..f4440e29f 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -4,7 +4,9 @@ use std::ops::Range;
 use compiler::circuit_design::function::{FunctionCode, FunctionCodeInfo};
 use compiler::circuit_design::template::{TemplateCode, TemplateCodeInfo};
 use compiler::compiler_interface::Circuit;
-use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer, new_id};
+use compiler::intermediate_representation::{
+    Instruction, InstructionList, InstructionPointer, new_id, BucketId,
+};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::passes::{
     checks::assert_unique_ids_in_circuit, conditional_flattening::ConditionalFlatteningPass,
@@ -183,7 +185,11 @@ pub trait CircuitTransformationPass {
         }
     }
 
-    fn transform_location_rule(&self, location_rule: &LocationRule) -> LocationRule {
+    fn transform_location_rule(
+        &self,
+        _bucket_id: &BucketId,
+        location_rule: &LocationRule,
+    ) -> LocationRule {
         match location_rule {
             LocationRule::Indexed { location, template_header } => LocationRule::Indexed {
                 location: self.transform_instruction(location),
@@ -203,7 +209,7 @@ pub trait CircuitTransformationPass {
             line: bucket.line,
             message_id: bucket.message_id,
             address_type: self.transform_address_type(&bucket.address_type),
-            src: self.transform_location_rule(&bucket.src),
+            src: self.transform_location_rule(&bucket.id, &bucket.src),
             bounded_fn: bucket.bounded_fn.clone(),
         }
         .allocate()
@@ -218,7 +224,7 @@ pub trait CircuitTransformationPass {
             context: bucket.context.clone(),
             dest_is_output: bucket.dest_is_output,
             dest_address_type: self.transform_address_type(&bucket.dest_address_type),
-            dest: self.transform_location_rule(&bucket.dest),
+            dest: self.transform_location_rule(&bucket.id, &bucket.dest),
             src: self.transform_instruction(&bucket.src),
             bounded_fn: bucket.bounded_fn.clone(),
         }
@@ -238,18 +244,18 @@ pub trait CircuitTransformationPass {
         .allocate()
     }
 
-    fn transform_final_data(&self, final_data: &FinalData) -> FinalData {
+    fn transform_final_data(&self, bucket_id: &BucketId, final_data: &FinalData) -> FinalData {
         FinalData {
             context: final_data.context,
             dest_is_output: final_data.dest_is_output,
             dest_address_type: self.transform_address_type(&final_data.dest_address_type),
-            dest: self.transform_location_rule(&final_data.dest),
+            dest: self.transform_location_rule(bucket_id, &final_data.dest),
         }
     }
 
-    fn transform_return_type(&self, return_type: &ReturnType) -> ReturnType {
+    fn transform_return_type(&self, bucket_id: &BucketId, return_type: &ReturnType) -> ReturnType {
         match return_type {
-            ReturnType::Final(f) => ReturnType::Final(self.transform_final_data(f)),
+            ReturnType::Final(f) => ReturnType::Final(self.transform_final_data(bucket_id, f)),
             x => x.clone(),
         }
     }
@@ -264,7 +270,7 @@ pub trait CircuitTransformationPass {
             argument_types: bucket.argument_types.clone(),
             arguments: self.transform_instructions(&bucket.arguments),
             arena_size: bucket.arena_size,
-            return_info: self.transform_return_type(&bucket.return_info),
+            return_info: self.transform_return_type(&bucket.id, &bucket.return_info),
         }
         .allocate()
     }
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index d5317ab21..8d83f1d6c 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -160,7 +160,7 @@ impl CircuitTransformationPass for SimplificationPass<'_> {
             argument_types: bucket.argument_types.clone(),
             arguments: self.transform_instructions(&bucket.arguments),
             arena_size: bucket.arena_size,
-            return_info: self.transform_return_type(&bucket.return_info),
+            return_info: self.transform_return_type(&bucket.id, &bucket.return_info),
         }
         .allocate()
     }
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index e60a75274..6cce4821f 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -290,7 +290,7 @@ impl CircuitTransformationPass for UnknownIndexSanitizationPass<'_> {
             line: bucket.line,
             message_id: bucket.message_id,
             address_type: self.transform_address_type(&bucket.address_type),
-            src: self.transform_location_rule(&bucket.src),
+            src: self.transform_location_rule(&bucket.id, &bucket.src),
             bounded_fn: bounded_fn_symbol,
         }
         .allocate()
@@ -309,7 +309,7 @@ impl CircuitTransformationPass for UnknownIndexSanitizationPass<'_> {
             context: bucket.context.clone(),
             dest_is_output: bucket.dest_is_output,
             dest_address_type: self.transform_address_type(&bucket.dest_address_type),
-            dest: self.transform_location_rule(&bucket.dest),
+            dest: self.transform_location_rule(&bucket.id, &bucket.dest),
             src: self.transform_instruction(&bucket.src),
             bounded_fn: bounded_fn_symbol,
         }
diff --git a/compiler/src/intermediate_representation/location_rule.rs b/compiler/src/intermediate_representation/location_rule.rs
index 6bc7f2637..7a50a4266 100644
--- a/compiler/src/intermediate_representation/location_rule.rs
+++ b/compiler/src/intermediate_representation/location_rule.rs
@@ -65,7 +65,7 @@ impl WriteLLVMIR for LocationRule {
     fn produce_llvm_ir<'a, 'b>(&self, producer: &'b dyn LLVMIRProducer<'a>) -> Option<LLVMInstruction<'a>> {
         match self {
             LocationRule::Indexed { location, .. } => location.produce_llvm_ir(producer),
-            LocationRule::Mapped { .. } => unreachable!() // We should not produce mapped location rules for Vanguard!
+            LocationRule::Mapped { .. } => unreachable!("LocationRule::Mapped should have been replaced"),
         }
     }
 }
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index fa0e8def6..2e76b63fd 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -215,24 +215,19 @@ impl StoreBucket {
             if let InputInformation::Input { status } = input_information {
                 let sub_cmp_name = match &dest {
                     LocationRule::Indexed { template_header, .. } => template_header.clone(),
-                    LocationRule::Mapped { .. } => None,
-                };
+                    LocationRule::Mapped { .. } => unreachable!("LocationRule::Mapped should have been replaced"),
+                }.expect("Could not get the name of the subcomponent");
                 match status {
                     StatusInput::Last => {
-                        let run_fn = run_fn_name(
-                            sub_cmp_name.expect("Could not get the name of the subcomponent"),
-                        );
                         // If we reach this point gep is the address of the subcomponent so we can just reuse it
                         let addr = cmp_address
                             .produce_llvm_ir(producer)
                             .expect("The address of a subcomponent must yield a value!");
                         let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_call(producer, run_fn.as_str(), &[subcmp.into()]);
+                        create_call(producer, run_fn_name(sub_cmp_name).as_str(), &[subcmp.into()]);
                     }
                     StatusInput::Unknown => {
                         panic!("There should not be Unknown input status");
-                        // let sub_cmp_name = sub_cmp_name.expect("Could not get the name of the subcomponent");
-                        // let run_fn = run_fn_name(sub_cmp_name.clone());
                         // let current_function = producer.current_function();
                         // let run_bb = create_bb(producer, current_function, format!("maybe_run.{}", sub_cmp_name).as_str());
                         // let continue_bb = create_bb(producer, current_function,"continue.store");
@@ -248,7 +243,7 @@ impl StoreBucket {
                         // let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
                         // let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                         //
-                        // create_call(producer, run_fn.as_str(), &[subcmp.into()]);
+                        // create_call(producer, run_fn_name(sub_cmp_name).as_str(), &[subcmp.into()]);
                         // create_br(producer,continue_bb);
                         // producer.set_current_bb(continue_bb);
                     }

From 56aec48866a6b24a049402132ff5f48b317c9a23 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 4 Oct 2023 16:12:19 -0500
Subject: [PATCH 11/22] fix XFAIL test outputs so they pass

---
 circom/tests/subcmps/mapped.circom  | 405 +++++++++++++++++++-
 circom/tests/subcmps/mapped2.circom | 554 +++++++++++++++++++++++++++-
 circom/tests/subcmps/mapped3.circom | 146 +++++++-
 circom/tests/subcmps/mapped4.circom | 222 ++++++++++-
 4 files changed, 1293 insertions(+), 34 deletions(-)

diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index 7726d39ed..8e16e7307 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -40,5 +40,406 @@ template B(n) {
 
 component main = B(2);
 
-//TODO: This check is a filler just to capture when the test no longer crashes
-//CHECK: declare void @llvm.donothing()
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %4, align 4
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %call.fr_mul, i256 %5, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 1)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %4, align 4
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %call.fr_mul, i256 %5, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 1)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @A_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 8
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 9
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0, i256* %9, i256* %10, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 6
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 10
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %13, i256* %14, i256* %15)
+//CHECK-NEXT:   %16 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 7
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 11
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %16, [0 x i256]* %0, i256* %17, i256* %18, i256* %19)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @A_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %8, [0 x i256]* %0, i256* %9, i256* %10, i256* %11)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @B_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @A_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %create_cmp3
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp3:
+//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @A_1_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop6
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop6:
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %8 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %9 = load [0 x i256]*, [0 x i256]** %8, align 8
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i32 4
+//CHECK-NEXT:   store i256 %7, i256* %10, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter = load i32, i32* %11, align 4
+//CHECK-NEXT:   %decrement.counter = sub i32 %load.subcmp.counter, 1
+//CHECK-NEXT:   store i32 %decrement.counter, i32* %11, align 4
+//CHECK-NEXT:   %12 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %7, i256 %12, i1* %constraint)
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 6
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %15 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %16 = load [0 x i256]*, [0 x i256]** %15, align 8
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %16, i32 0, i32 8
+//CHECK-NEXT:   store i256 %14, i256* %17, align 4
+//CHECK-NEXT:   %18 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter1 = load i32, i32* %18, align 4
+//CHECK-NEXT:   %decrement.counter2 = sub i32 %load.subcmp.counter1, 1
+//CHECK-NEXT:   store i32 %decrement.counter2, i32* %18, align 4
+//CHECK-NEXT:   %19 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %constraint3 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %14, i256 %19, i1* %constraint3)
+//CHECK-NEXT:   %20 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %20, align 4
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
+//CHECK-NEXT:   %22 = load i256, i256* %21, align 4
+//CHECK-NEXT:   %23 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %24 = load [0 x i256]*, [0 x i256]** %23, align 8
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %24, i32 0, i32 5
+//CHECK-NEXT:   store i256 %22, i256* %25, align 4
+//CHECK-NEXT:   %26 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter4 = load i32, i32* %26, align 4
+//CHECK-NEXT:   %decrement.counter5 = sub i32 %load.subcmp.counter4, 1
+//CHECK-NEXT:   store i32 %decrement.counter5, i32* %26, align 4
+//CHECK-NEXT:   %27 = load i256, i256* %25, align 4
+//CHECK-NEXT:   %constraint6 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %22, i256 %27, i1* %constraint6)
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 7
+//CHECK-NEXT:   %29 = load i256, i256* %28, align 4
+//CHECK-NEXT:   %30 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %31 = load [0 x i256]*, [0 x i256]** %30, align 8
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %31, i32 0, i32 9
+//CHECK-NEXT:   store i256 %29, i256* %32, align 4
+//CHECK-NEXT:   %33 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter7 = load i32, i32* %33, align 4
+//CHECK-NEXT:   %decrement.counter8 = sub i32 %load.subcmp.counter7, 1
+//CHECK-NEXT:   store i32 %decrement.counter8, i32* %33, align 4
+//CHECK-NEXT:   %34 = load i256, i256* %32, align 4
+//CHECK-NEXT:   %constraint9 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %29, i256 %34, i1* %constraint9)
+//CHECK-NEXT:   %35 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %35, align 4
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
+//CHECK-NEXT:   %37 = load i256, i256* %36, align 4
+//CHECK-NEXT:   %38 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %39 = load [0 x i256]*, [0 x i256]** %38, align 8
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0, i32 6
+//CHECK-NEXT:   store i256 %37, i256* %40, align 4
+//CHECK-NEXT:   %41 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter10 = load i32, i32* %41, align 4
+//CHECK-NEXT:   %decrement.counter11 = sub i32 %load.subcmp.counter10, 1
+//CHECK-NEXT:   store i32 %decrement.counter11, i32* %41, align 4
+//CHECK-NEXT:   %42 = load i256, i256* %40, align 4
+//CHECK-NEXT:   %constraint12 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %37, i256 %42, i1* %constraint12)
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 8
+//CHECK-NEXT:   %44 = load i256, i256* %43, align 4
+//CHECK-NEXT:   %45 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %46 = load [0 x i256]*, [0 x i256]** %45, align 8
+//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i32 10
+//CHECK-NEXT:   store i256 %44, i256* %47, align 4
+//CHECK-NEXT:   %48 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter13 = load i32, i32* %48, align 4
+//CHECK-NEXT:   %decrement.counter14 = sub i32 %load.subcmp.counter13, 1
+//CHECK-NEXT:   store i32 %decrement.counter14, i32* %48, align 4
+//CHECK-NEXT:   %49 = load i256, i256* %47, align 4
+//CHECK-NEXT:   %constraint15 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %44, i256 %49, i1* %constraint15)
+//CHECK-NEXT:   %50 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 3, i256* %50, align 4
+//CHECK-NEXT:   %51 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 5
+//CHECK-NEXT:   %52 = load i256, i256* %51, align 4
+//CHECK-NEXT:   %53 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %54 = load [0 x i256]*, [0 x i256]** %53, align 8
+//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %54, i32 0, i32 7
+//CHECK-NEXT:   store i256 %52, i256* %55, align 4
+//CHECK-NEXT:   %56 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter16 = load i32, i32* %56, align 4
+//CHECK-NEXT:   %decrement.counter17 = sub i32 %load.subcmp.counter16, 1
+//CHECK-NEXT:   store i32 %decrement.counter17, i32* %56, align 4
+//CHECK-NEXT:   %57 = load i256, i256* %55, align 4
+//CHECK-NEXT:   %constraint18 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %52, i256 %57, i1* %constraint18)
+//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 9
+//CHECK-NEXT:   %59 = load i256, i256* %58, align 4
+//CHECK-NEXT:   %60 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %61 = load [0 x i256]*, [0 x i256]** %60, align 8
+//CHECK-NEXT:   %62 = getelementptr [0 x i256], [0 x i256]* %61, i32 0, i32 11
+//CHECK-NEXT:   store i256 %59, i256* %62, align 4
+//CHECK-NEXT:   %63 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter19 = load i32, i32* %63, align 4
+//CHECK-NEXT:   %decrement.counter20 = sub i32 %load.subcmp.counter19, 1
+//CHECK-NEXT:   store i32 %decrement.counter20, i32* %63, align 4
+//CHECK-NEXT:   %64 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %65 = load [0 x i256]*, [0 x i256]** %64, align 8
+//CHECK-NEXT:   call void @A_0_run([0 x i256]* %65)
+//CHECK-NEXT:   %66 = load i256, i256* %62, align 4
+//CHECK-NEXT:   %constraint21 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %59, i256 %66, i1* %constraint21)
+//CHECK-NEXT:   %67 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 4, i256* %67, align 4
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %68 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %68, align 4
+//CHECK-NEXT:   br label %unrolled_loop8
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop8:
+//CHECK-NEXT:   %69 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %70 = load [0 x i256]*, [0 x i256]** %69, align 8
+//CHECK-NEXT:   %71 = getelementptr [0 x i256], [0 x i256]* %70, i32 0, i32 0
+//CHECK-NEXT:   %72 = load i256, i256* %71, align 4
+//CHECK-NEXT:   %73 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %74 = load [0 x i256]*, [0 x i256]** %73, align 8
+//CHECK-NEXT:   %75 = getelementptr [0 x i256], [0 x i256]* %74, i32 0, i32 2
+//CHECK-NEXT:   store i256 %72, i256* %75, align 4
+//CHECK-NEXT:   %76 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter22 = load i32, i32* %76, align 4
+//CHECK-NEXT:   %decrement.counter23 = sub i32 %load.subcmp.counter22, 1
+//CHECK-NEXT:   store i32 %decrement.counter23, i32* %76, align 4
+//CHECK-NEXT:   %77 = load i256, i256* %75, align 4
+//CHECK-NEXT:   %constraint24 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %72, i256 %77, i1* %constraint24)
+//CHECK-NEXT:   %78 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %79 = load [0 x i256]*, [0 x i256]** %78, align 8
+//CHECK-NEXT:   %80 = getelementptr [0 x i256], [0 x i256]* %79, i32 0, i32 2
+//CHECK-NEXT:   %81 = load i256, i256* %80, align 4
+//CHECK-NEXT:   %82 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %83 = load [0 x i256]*, [0 x i256]** %82, align 8
+//CHECK-NEXT:   %84 = getelementptr [0 x i256], [0 x i256]* %83, i32 0, i32 4
+//CHECK-NEXT:   store i256 %81, i256* %84, align 4
+//CHECK-NEXT:   %85 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter25 = load i32, i32* %85, align 4
+//CHECK-NEXT:   %decrement.counter26 = sub i32 %load.subcmp.counter25, 1
+//CHECK-NEXT:   store i32 %decrement.counter26, i32* %85, align 4
+//CHECK-NEXT:   %86 = load i256, i256* %84, align 4
+//CHECK-NEXT:   %constraint27 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %81, i256 %86, i1* %constraint27)
+//CHECK-NEXT:   %87 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %87, align 4
+//CHECK-NEXT:   %88 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %89 = load [0 x i256]*, [0 x i256]** %88, align 8
+//CHECK-NEXT:   %90 = getelementptr [0 x i256], [0 x i256]* %89, i32 0, i32 1
+//CHECK-NEXT:   %91 = load i256, i256* %90, align 4
+//CHECK-NEXT:   %92 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %93 = load [0 x i256]*, [0 x i256]** %92, align 8
+//CHECK-NEXT:   %94 = getelementptr [0 x i256], [0 x i256]* %93, i32 0, i32 3
+//CHECK-NEXT:   store i256 %91, i256* %94, align 4
+//CHECK-NEXT:   %95 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter28 = load i32, i32* %95, align 4
+//CHECK-NEXT:   %decrement.counter29 = sub i32 %load.subcmp.counter28, 1
+//CHECK-NEXT:   store i32 %decrement.counter29, i32* %95, align 4
+//CHECK-NEXT:   %96 = load i256, i256* %94, align 4
+//CHECK-NEXT:   %constraint30 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %91, i256 %96, i1* %constraint30)
+//CHECK-NEXT:   %97 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %98 = load [0 x i256]*, [0 x i256]** %97, align 8
+//CHECK-NEXT:   %99 = getelementptr [0 x i256], [0 x i256]* %98, i32 0, i32 3
+//CHECK-NEXT:   %100 = load i256, i256* %99, align 4
+//CHECK-NEXT:   %101 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %102 = load [0 x i256]*, [0 x i256]** %101, align 8
+//CHECK-NEXT:   %103 = getelementptr [0 x i256], [0 x i256]* %102, i32 0, i32 5
+//CHECK-NEXT:   store i256 %100, i256* %103, align 4
+//CHECK-NEXT:   %104 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter31 = load i32, i32* %104, align 4
+//CHECK-NEXT:   %decrement.counter32 = sub i32 %load.subcmp.counter31, 1
+//CHECK-NEXT:   store i32 %decrement.counter32, i32* %104, align 4
+//CHECK-NEXT:   %105 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %106 = load [0 x i256]*, [0 x i256]** %105, align 8
+//CHECK-NEXT:   call void @A_1_run([0 x i256]* %106)
+//CHECK-NEXT:   %107 = load i256, i256* %103, align 4
+//CHECK-NEXT:   %constraint33 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %100, i256 %107, i1* %constraint33)
+//CHECK-NEXT:   %108 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %108, align 4
+//CHECK-NEXT:   br label %store9
+//CHECK-EMPTY: 
+//CHECK-NEXT: store9:
+//CHECK-NEXT:   %109 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %109, align 4
+//CHECK-NEXT:   br label %unrolled_loop10
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop10:
+//CHECK-NEXT:   %110 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %111 = load [0 x i256]*, [0 x i256]** %110, align 8
+//CHECK-NEXT:   %112 = getelementptr [0 x i256], [0 x i256]* %111, i32 0, i32 0
+//CHECK-NEXT:   %113 = load i256, i256* %112, align 4
+//CHECK-NEXT:   %114 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %113, i256* %114, align 4
+//CHECK-NEXT:   %115 = load i256, i256* %114, align 4
+//CHECK-NEXT:   %constraint34 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %113, i256 %115, i1* %constraint34)
+//CHECK-NEXT:   %116 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %116, align 4
+//CHECK-NEXT:   %117 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %118 = load [0 x i256]*, [0 x i256]** %117, align 8
+//CHECK-NEXT:   %119 = getelementptr [0 x i256], [0 x i256]* %118, i32 0, i32 1
+//CHECK-NEXT:   %120 = load i256, i256* %119, align 4
+//CHECK-NEXT:   %121 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   store i256 %120, i256* %121, align 4
+//CHECK-NEXT:   %122 = load i256, i256* %121, align 4
+//CHECK-NEXT:   %constraint35 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %120, i256 %122, i1* %constraint35)
+//CHECK-NEXT:   %123 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %123, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index dba58ffe6..0e1670a01 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -57,5 +57,555 @@ template B(n, m, j) {
 
 component main = B(2, 3, 2);
 
-//TODO: This check is a filler just to capture when the test no longer crashes
-//CHECK: declare void @llvm.donothing()
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0, i256* %fix_1){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_1, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %1, i256 2)
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %call.fr_mul, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 1)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0, i256* %fix_1){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_1, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %1, i256 2)
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %call.fr_mul, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 1)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @A_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6)
+//CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* %9)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @A_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6)
+//CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @B_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [5 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [4 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 3, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 2, i256* %3, align 4
+//CHECK-NEXT:   br label %create_cmp4
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp4:
+//CHECK-NEXT:   %4 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @A_0_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   br label %create_cmp5
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp5:
+//CHECK-NEXT:   %5 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @A_0_build({ [0 x i256]*, i32 }* %5)
+//CHECK-NEXT:   br label %create_cmp6
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp6:
+//CHECK-NEXT:   %6 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
+//CHECK-NEXT:   call void @A_1_build({ [0 x i256]*, i32 }* %6)
+//CHECK-NEXT:   br label %create_cmp7
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp7:
+//CHECK-NEXT:   %7 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3
+//CHECK-NEXT:   call void @A_1_build({ [0 x i256]*, i32 }* %7)
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %8 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %8, align 4
+//CHECK-NEXT:   br label %store9
+//CHECK-EMPTY: 
+//CHECK-NEXT: store9:
+//CHECK-NEXT:   %9 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   br label %store10
+//CHECK-EMPTY: 
+//CHECK-NEXT: store10:
+//CHECK-NEXT:   %10 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %10, align 4
+//CHECK-NEXT:   br label %unrolled_loop11
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop11:
+//CHECK-NEXT:   %11 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %11, align 4
+//CHECK-NEXT:   %12 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %12, align 4
+//CHECK-NEXT:   br label %store12
+//CHECK-EMPTY: 
+//CHECK-NEXT: store12:
+//CHECK-NEXT:   %13 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %13, align 4
+//CHECK-NEXT:   br label %unrolled_loop13
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop13:
+//CHECK-NEXT:   %14 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 10
+//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
+//CHECK-NEXT:   %17 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %18 = load [0 x i256]*, [0 x i256]** %17, align 8
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i32 2
+//CHECK-NEXT:   store i256 %16, i256* %19, align 4
+//CHECK-NEXT:   %20 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter = load i32, i32* %20, align 4
+//CHECK-NEXT:   %decrement.counter = sub i32 %load.subcmp.counter, 1
+//CHECK-NEXT:   store i32 %decrement.counter, i32* %20, align 4
+//CHECK-NEXT:   %21 = load i256, i256* %19, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %16, i256 %21, i1* %constraint)
+//CHECK-NEXT:   %22 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %22, align 4
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 11
+//CHECK-NEXT:   %24 = load i256, i256* %23, align 4
+//CHECK-NEXT:   %25 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %26 = load [0 x i256]*, [0 x i256]** %25, align 8
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %26, i32 0, i32 2
+//CHECK-NEXT:   store i256 %24, i256* %27, align 4
+//CHECK-NEXT:   %28 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter1 = load i32, i32* %28, align 4
+//CHECK-NEXT:   %decrement.counter2 = sub i32 %load.subcmp.counter1, 1
+//CHECK-NEXT:   store i32 %decrement.counter2, i32* %28, align 4
+//CHECK-NEXT:   %29 = load i256, i256* %27, align 4
+//CHECK-NEXT:   %constraint3 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %24, i256 %29, i1* %constraint3)
+//CHECK-NEXT:   %30 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %30, align 4
+//CHECK-NEXT:   %31 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %31, align 4
+//CHECK-NEXT:   %32 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %32, align 4
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 12
+//CHECK-NEXT:   %34 = load i256, i256* %33, align 4
+//CHECK-NEXT:   %35 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %36 = load [0 x i256]*, [0 x i256]** %35, align 8
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %36, i32 0, i32 3
+//CHECK-NEXT:   store i256 %34, i256* %37, align 4
+//CHECK-NEXT:   %38 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter4 = load i32, i32* %38, align 4
+//CHECK-NEXT:   %decrement.counter5 = sub i32 %load.subcmp.counter4, 1
+//CHECK-NEXT:   store i32 %decrement.counter5, i32* %38, align 4
+//CHECK-NEXT:   %39 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %40 = load [0 x i256]*, [0 x i256]** %39, align 8
+//CHECK-NEXT:   call void @A_0_run([0 x i256]* %40)
+//CHECK-NEXT:   %41 = load i256, i256* %37, align 4
+//CHECK-NEXT:   %constraint6 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %34, i256 %41, i1* %constraint6)
+//CHECK-NEXT:   %42 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %42, align 4
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 13
+//CHECK-NEXT:   %44 = load i256, i256* %43, align 4
+//CHECK-NEXT:   %45 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %46 = load [0 x i256]*, [0 x i256]** %45, align 8
+//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i32 3
+//CHECK-NEXT:   store i256 %44, i256* %47, align 4
+//CHECK-NEXT:   %48 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter7 = load i32, i32* %48, align 4
+//CHECK-NEXT:   %decrement.counter8 = sub i32 %load.subcmp.counter7, 1
+//CHECK-NEXT:   store i32 %decrement.counter8, i32* %48, align 4
+//CHECK-NEXT:   %49 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %50 = load [0 x i256]*, [0 x i256]** %49, align 8
+//CHECK-NEXT:   call void @A_0_run([0 x i256]* %50)
+//CHECK-NEXT:   %51 = load i256, i256* %47, align 4
+//CHECK-NEXT:   %constraint9 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %44, i256 %51, i1* %constraint9)
+//CHECK-NEXT:   %52 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %52, align 4
+//CHECK-NEXT:   %53 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 2, i256* %53, align 4
+//CHECK-NEXT:   br label %store14
+//CHECK-EMPTY: 
+//CHECK-NEXT: store14:
+//CHECK-NEXT:   %54 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %54, align 4
+//CHECK-NEXT:   br label %unrolled_loop15
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop15:
+//CHECK-NEXT:   %55 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %55, align 4
+//CHECK-NEXT:   %56 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %56, align 4
+//CHECK-NEXT:   br label %store16
+//CHECK-EMPTY: 
+//CHECK-NEXT: store16:
+//CHECK-NEXT:   %57 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %57, align 4
+//CHECK-NEXT:   br label %unrolled_loop17
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop17:
+//CHECK-NEXT:   %58 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %58, align 4
+//CHECK-NEXT:   %59 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 14
+//CHECK-NEXT:   %60 = load i256, i256* %59, align 4
+//CHECK-NEXT:   %61 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %62 = load [0 x i256]*, [0 x i256]** %61, align 8
+//CHECK-NEXT:   %63 = getelementptr [0 x i256], [0 x i256]* %62, i32 0, i32 3
+//CHECK-NEXT:   store i256 %60, i256* %63, align 4
+//CHECK-NEXT:   %64 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %load.subcmp.counter10 = load i32, i32* %64, align 4
+//CHECK-NEXT:   %decrement.counter11 = sub i32 %load.subcmp.counter10, 1
+//CHECK-NEXT:   store i32 %decrement.counter11, i32* %64, align 4
+//CHECK-NEXT:   %65 = load i256, i256* %63, align 4
+//CHECK-NEXT:   %constraint12 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %60, i256 %65, i1* %constraint12)
+//CHECK-NEXT:   %66 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %66, align 4
+//CHECK-NEXT:   %67 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 15
+//CHECK-NEXT:   %68 = load i256, i256* %67, align 4
+//CHECK-NEXT:   %69 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %70 = load [0 x i256]*, [0 x i256]** %69, align 8
+//CHECK-NEXT:   %71 = getelementptr [0 x i256], [0 x i256]* %70, i32 0, i32 3
+//CHECK-NEXT:   store i256 %68, i256* %71, align 4
+//CHECK-NEXT:   %72 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 1
+//CHECK-NEXT:   %load.subcmp.counter13 = load i32, i32* %72, align 4
+//CHECK-NEXT:   %decrement.counter14 = sub i32 %load.subcmp.counter13, 1
+//CHECK-NEXT:   store i32 %decrement.counter14, i32* %72, align 4
+//CHECK-NEXT:   %73 = load i256, i256* %71, align 4
+//CHECK-NEXT:   %constraint15 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %68, i256 %73, i1* %constraint15)
+//CHECK-NEXT:   %74 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %74, align 4
+//CHECK-NEXT:   %75 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %75, align 4
+//CHECK-NEXT:   %76 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %76, align 4
+//CHECK-NEXT:   %77 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 16
+//CHECK-NEXT:   %78 = load i256, i256* %77, align 4
+//CHECK-NEXT:   %79 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %80 = load [0 x i256]*, [0 x i256]** %79, align 8
+//CHECK-NEXT:   %81 = getelementptr [0 x i256], [0 x i256]* %80, i32 0, i32 4
+//CHECK-NEXT:   store i256 %78, i256* %81, align 4
+//CHECK-NEXT:   %82 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %load.subcmp.counter16 = load i32, i32* %82, align 4
+//CHECK-NEXT:   %decrement.counter17 = sub i32 %load.subcmp.counter16, 1
+//CHECK-NEXT:   store i32 %decrement.counter17, i32* %82, align 4
+//CHECK-NEXT:   %83 = load i256, i256* %81, align 4
+//CHECK-NEXT:   %constraint18 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %78, i256 %83, i1* %constraint18)
+//CHECK-NEXT:   %84 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %84, align 4
+//CHECK-NEXT:   %85 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 17
+//CHECK-NEXT:   %86 = load i256, i256* %85, align 4
+//CHECK-NEXT:   %87 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %88 = load [0 x i256]*, [0 x i256]** %87, align 8
+//CHECK-NEXT:   %89 = getelementptr [0 x i256], [0 x i256]* %88, i32 0, i32 4
+//CHECK-NEXT:   store i256 %86, i256* %89, align 4
+//CHECK-NEXT:   %90 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 1
+//CHECK-NEXT:   %load.subcmp.counter19 = load i32, i32* %90, align 4
+//CHECK-NEXT:   %decrement.counter20 = sub i32 %load.subcmp.counter19, 1
+//CHECK-NEXT:   store i32 %decrement.counter20, i32* %90, align 4
+//CHECK-NEXT:   %91 = load i256, i256* %89, align 4
+//CHECK-NEXT:   %constraint21 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %86, i256 %91, i1* %constraint21)
+//CHECK-NEXT:   %92 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %92, align 4
+//CHECK-NEXT:   %93 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 2, i256* %93, align 4
+//CHECK-NEXT:   %94 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %94, align 4
+//CHECK-NEXT:   %95 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 18
+//CHECK-NEXT:   %96 = load i256, i256* %95, align 4
+//CHECK-NEXT:   %97 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %98 = load [0 x i256]*, [0 x i256]** %97, align 8
+//CHECK-NEXT:   %99 = getelementptr [0 x i256], [0 x i256]* %98, i32 0, i32 5
+//CHECK-NEXT:   store i256 %96, i256* %99, align 4
+//CHECK-NEXT:   %100 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %load.subcmp.counter22 = load i32, i32* %100, align 4
+//CHECK-NEXT:   %decrement.counter23 = sub i32 %load.subcmp.counter22, 1
+//CHECK-NEXT:   store i32 %decrement.counter23, i32* %100, align 4
+//CHECK-NEXT:   %101 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %102 = load [0 x i256]*, [0 x i256]** %101, align 8
+//CHECK-NEXT:   call void @A_1_run([0 x i256]* %102)
+//CHECK-NEXT:   %103 = load i256, i256* %99, align 4
+//CHECK-NEXT:   %constraint24 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %96, i256 %103, i1* %constraint24)
+//CHECK-NEXT:   %104 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %104, align 4
+//CHECK-NEXT:   %105 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 19
+//CHECK-NEXT:   %106 = load i256, i256* %105, align 4
+//CHECK-NEXT:   %107 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %108 = load [0 x i256]*, [0 x i256]** %107, align 8
+//CHECK-NEXT:   %109 = getelementptr [0 x i256], [0 x i256]* %108, i32 0, i32 5
+//CHECK-NEXT:   store i256 %106, i256* %109, align 4
+//CHECK-NEXT:   %110 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 1
+//CHECK-NEXT:   %load.subcmp.counter25 = load i32, i32* %110, align 4
+//CHECK-NEXT:   %decrement.counter26 = sub i32 %load.subcmp.counter25, 1
+//CHECK-NEXT:   store i32 %decrement.counter26, i32* %110, align 4
+//CHECK-NEXT:   %111 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %112 = load [0 x i256]*, [0 x i256]** %111, align 8
+//CHECK-NEXT:   call void @A_1_run([0 x i256]* %112)
+//CHECK-NEXT:   %113 = load i256, i256* %109, align 4
+//CHECK-NEXT:   %constraint27 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %106, i256 %113, i1* %constraint27)
+//CHECK-NEXT:   %114 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %114, align 4
+//CHECK-NEXT:   %115 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 3, i256* %115, align 4
+//CHECK-NEXT:   br label %store18
+//CHECK-EMPTY: 
+//CHECK-NEXT: store18:
+//CHECK-NEXT:   %116 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %116, align 4
+//CHECK-NEXT:   br label %unrolled_loop19
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop19:
+//CHECK-NEXT:   %117 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %117, align 4
+//CHECK-NEXT:   %118 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %119 = load [0 x i256]*, [0 x i256]** %118, align 8
+//CHECK-NEXT:   %120 = getelementptr [0 x i256], [0 x i256]* %119, i32 0, i32 0
+//CHECK-NEXT:   %121 = load i256, i256* %120, align 4
+//CHECK-NEXT:   %122 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %121, i256* %122, align 4
+//CHECK-NEXT:   %123 = load i256, i256* %122, align 4
+//CHECK-NEXT:   %constraint28 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %121, i256 %123, i1* %constraint28)
+//CHECK-NEXT:   %124 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %124, align 4
+//CHECK-NEXT:   %125 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %126 = load [0 x i256]*, [0 x i256]** %125, align 8
+//CHECK-NEXT:   %127 = getelementptr [0 x i256], [0 x i256]* %126, i32 0, i32 0
+//CHECK-NEXT:   %128 = load i256, i256* %127, align 4
+//CHECK-NEXT:   %129 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   store i256 %128, i256* %129, align 4
+//CHECK-NEXT:   %130 = load i256, i256* %129, align 4
+//CHECK-NEXT:   %constraint29 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %128, i256 %130, i1* %constraint29)
+//CHECK-NEXT:   %131 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %131, align 4
+//CHECK-NEXT:   %132 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %132, align 4
+//CHECK-NEXT:   %133 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %133, align 4
+//CHECK-NEXT:   %134 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %135 = load [0 x i256]*, [0 x i256]** %134, align 8
+//CHECK-NEXT:   %136 = getelementptr [0 x i256], [0 x i256]* %135, i32 0, i32 1
+//CHECK-NEXT:   %137 = load i256, i256* %136, align 4
+//CHECK-NEXT:   %138 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 %137, i256* %138, align 4
+//CHECK-NEXT:   %139 = load i256, i256* %138, align 4
+//CHECK-NEXT:   %constraint30 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %137, i256 %139, i1* %constraint30)
+//CHECK-NEXT:   %140 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %140, align 4
+//CHECK-NEXT:   %141 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %142 = load [0 x i256]*, [0 x i256]** %141, align 8
+//CHECK-NEXT:   %143 = getelementptr [0 x i256], [0 x i256]* %142, i32 0, i32 1
+//CHECK-NEXT:   %144 = load i256, i256* %143, align 4
+//CHECK-NEXT:   %145 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
+//CHECK-NEXT:   store i256 %144, i256* %145, align 4
+//CHECK-NEXT:   %146 = load i256, i256* %145, align 4
+//CHECK-NEXT:   %constraint31 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %144, i256 %146, i1* %constraint31)
+//CHECK-NEXT:   %147 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %147, align 4
+//CHECK-NEXT:   %148 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 2, i256* %148, align 4
+//CHECK-NEXT:   br label %store20
+//CHECK-EMPTY: 
+//CHECK-NEXT: store20:
+//CHECK-NEXT:   %149 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %149, align 4
+//CHECK-NEXT:   br label %unrolled_loop21
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop21:
+//CHECK-NEXT:   %150 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %150, align 4
+//CHECK-NEXT:   %151 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %152 = load [0 x i256]*, [0 x i256]** %151, align 8
+//CHECK-NEXT:   %153 = getelementptr [0 x i256], [0 x i256]* %152, i32 0, i32 0
+//CHECK-NEXT:   %154 = load i256, i256* %153, align 4
+//CHECK-NEXT:   %155 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
+//CHECK-NEXT:   store i256 %154, i256* %155, align 4
+//CHECK-NEXT:   %156 = load i256, i256* %155, align 4
+//CHECK-NEXT:   %constraint32 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %154, i256 %156, i1* %constraint32)
+//CHECK-NEXT:   %157 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %157, align 4
+//CHECK-NEXT:   %158 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %159 = load [0 x i256]*, [0 x i256]** %158, align 8
+//CHECK-NEXT:   %160 = getelementptr [0 x i256], [0 x i256]* %159, i32 0, i32 0
+//CHECK-NEXT:   %161 = load i256, i256* %160, align 4
+//CHECK-NEXT:   %162 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 5
+//CHECK-NEXT:   store i256 %161, i256* %162, align 4
+//CHECK-NEXT:   %163 = load i256, i256* %162, align 4
+//CHECK-NEXT:   %constraint33 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %161, i256 %163, i1* %constraint33)
+//CHECK-NEXT:   %164 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %164, align 4
+//CHECK-NEXT:   %165 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %165, align 4
+//CHECK-NEXT:   %166 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %166, align 4
+//CHECK-NEXT:   %167 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %168 = load [0 x i256]*, [0 x i256]** %167, align 8
+//CHECK-NEXT:   %169 = getelementptr [0 x i256], [0 x i256]* %168, i32 0, i32 1
+//CHECK-NEXT:   %170 = load i256, i256* %169, align 4
+//CHECK-NEXT:   %171 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 6
+//CHECK-NEXT:   store i256 %170, i256* %171, align 4
+//CHECK-NEXT:   %172 = load i256, i256* %171, align 4
+//CHECK-NEXT:   %constraint34 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %170, i256 %172, i1* %constraint34)
+//CHECK-NEXT:   %173 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %173, align 4
+//CHECK-NEXT:   %174 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %175 = load [0 x i256]*, [0 x i256]** %174, align 8
+//CHECK-NEXT:   %176 = getelementptr [0 x i256], [0 x i256]* %175, i32 0, i32 1
+//CHECK-NEXT:   %177 = load i256, i256* %176, align 4
+//CHECK-NEXT:   %178 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 7
+//CHECK-NEXT:   store i256 %177, i256* %178, align 4
+//CHECK-NEXT:   %179 = load i256, i256* %178, align 4
+//CHECK-NEXT:   %constraint35 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %177, i256 %179, i1* %constraint35)
+//CHECK-NEXT:   %180 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %180, align 4
+//CHECK-NEXT:   %181 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 2, i256* %181, align 4
+//CHECK-NEXT:   %182 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %182, align 4
+//CHECK-NEXT:   %183 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %184 = load [0 x i256]*, [0 x i256]** %183, align 8
+//CHECK-NEXT:   %185 = getelementptr [0 x i256], [0 x i256]* %184, i32 0, i32 2
+//CHECK-NEXT:   %186 = load i256, i256* %185, align 4
+//CHECK-NEXT:   %187 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 8
+//CHECK-NEXT:   store i256 %186, i256* %187, align 4
+//CHECK-NEXT:   %188 = load i256, i256* %187, align 4
+//CHECK-NEXT:   %constraint36 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %186, i256 %188, i1* %constraint36)
+//CHECK-NEXT:   %189 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %189, align 4
+//CHECK-NEXT:   %190 = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 3, i32 0
+//CHECK-NEXT:   %191 = load [0 x i256]*, [0 x i256]** %190, align 8
+//CHECK-NEXT:   %192 = getelementptr [0 x i256], [0 x i256]* %191, i32 0, i32 2
+//CHECK-NEXT:   %193 = load i256, i256* %192, align 4
+//CHECK-NEXT:   %194 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 9
+//CHECK-NEXT:   store i256 %193, i256* %194, align 4
+//CHECK-NEXT:   %195 = load i256, i256* %194, align 4
+//CHECK-NEXT:   %constraint37 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %193, i256 %195, i1* %constraint37)
+//CHECK-NEXT:   %196 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %196, align 4
+//CHECK-NEXT:   %197 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 3, i256* %197, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index 6393b7fdc..e2957eaff 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -11,12 +11,6 @@ template ArrayOp(q) {
     }
 }
 
-//CHECK-LABEL: define void @ArrayOp_{{[0-9]+}}_build
-//CHECK-SAME: ({ [0 x i256]*, i32 }* %{{.*}})
-//CHECK: alloca [30 x i256]
-//CHECK: %[[DIM_REG:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
-//CHECK: store i32 15, i32* %{{.*}}[[DIM_REG]]
-
 template Wrapper() {
     signal input inp[15];
     signal output outp;
@@ -37,14 +31,136 @@ template Wrapper() {
 
 component main = Wrapper();
 
-//CHECK-LABEL: define void @Wrapper_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %{{.*}})
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_4:[0-9]+]]:
+//
+//CHECK-LABEL: define void @ArrayOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @ArrayOp_
+//CHECK-SAME: [[$RUN_1:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_1]](
+//
+//CHECK-LABEL: define void @ArrayOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+
+//CHECK-LABEL: define void @ArrayOp_
+//CHECK-SAME: [[$RUN_2:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_2]](
+//
+//CHECK-LABEL: define void @ArrayOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @ArrayOp_
+//CHECK-SAME: [[$RUN_3:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_3]](
+//
+//CHECK-LABEL: define void @ArrayOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @ArrayOp_
+//CHECK-SAME: [[$RUN_4:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_4]](
+//
+//CHECK-LABEL: define void @Wrapper_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [16 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %{{.*}}[[COUNTER]]
+//
+//CHECK-LABEL: define void @Wrapper_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
 //CHECK: %lvars = alloca [2 x i256]
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK: call void @ArrayOp_[[$RUN_1]]_run([0 x i256]* %
+//CHECK: call void @ArrayOp_[[$RUN_2]]_run([0 x i256]* %
+//CHECK: call void @ArrayOp_[[$RUN_3]]_run([0 x i256]* %
+//CHECK: call void @ArrayOp_[[$RUN_4]]_run([0 x i256]* %
 //COM: offset = (1 * (3 * 7)) + (2 * (7)) + (3) + 1 (since 0 is output) = 21 + 14 + 3 + 1 = 39
-//CHECK: store{{.*}}:{{.*}}; preds = %unrolled_loop{{.*}}
-//CHECK: %[[SUB_PTR:.*]] = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUB_PTR]]
-//CHECK: %[[VAL_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 3
-//CHECK: %[[VAL:.*]] = load i256, i256* %[[VAL_PTR]]
-//CHECK: %[[OUTP_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK: store i256 %[[VAL]], i256* %[[OUTP_PTR]]
+//CHECK: store{{[0-9]+}}:{{ +}}; preds = %unrolled_loop{{[0-9]+}}
+//CHECK-NEXT: %[[SUB_PTR:.*]] = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUB_PTR]]
+//CHECK-NEXT: %[[VAL_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 3
+//CHECK-NEXT: %[[VAL:.*]] = load i256, i256* %[[VAL_PTR]]
+//CHECK-NEXT: %[[OUTP_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT: store i256 %[[VAL]], i256* %[[OUTP_PTR]]
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index 7312cbc80..a0a771958 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -13,12 +13,6 @@ template MatrixOp(q) {
     }
 }
 
-//CHECK-LABEL: define void @MatrixOp_{{[0-9]+}}_build
-//CHECK-SAME: ({ [0 x i256]*, i32 }* %{{.*}})
-//CHECK: alloca [16 x i256]
-//CHECK: %[[DIM_REG:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
-//CHECK: store i32 15, i32* %{{.*}}[[DIM_REG]]
-
 template Wrapper() {
     signal input inp[5][3];
     signal output outp;
@@ -41,13 +35,211 @@ template Wrapper() {
 
 component main = Wrapper();
 
-//CHECK-LABEL: define void @Wrapper_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %{{.*}})
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_01:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_02:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_03:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_04:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_05:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_06:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_07:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_08:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_09:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_10:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_11:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_12:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_13:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_14:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_15:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_16:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_17:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_18:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_19:[0-9]+]]:
+//
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+}}([0 x i256]* %lvars, [0 x i256]* %signals,
+//CHECK-SAME: i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_20:[0-9]+]]:
+//
+//CHECK-LABEL: define void @MatrixOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @MatrixOp_
+//CHECK-SAME: [[$RUN_1:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256]
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }]
+//CHECK: call void @..generated..loop.body.[[$F_ID_01]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_01]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_01]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_02]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_02]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_02]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_03]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_03]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_03]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_04]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_04]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_04]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_05]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_05]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_05]](
+//
+//CHECK-LABEL: define void @MatrixOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @MatrixOp_
+//CHECK-SAME: [[$RUN_2:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256]
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }]
+//CHECK: call void @..generated..loop.body.[[$F_ID_06]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_06]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_06]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_07]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_07]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_07]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_08]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_08]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_08]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_09]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_09]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_09]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_10]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_10]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_10]](
+//
+//CHECK-LABEL: define void @MatrixOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @MatrixOp_
+//CHECK-SAME: [[$RUN_3:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256]
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }]
+//CHECK: call void @..generated..loop.body.[[$F_ID_11]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_11]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_11]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_12]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_12]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_12]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_13]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_13]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_13]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_14]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_14]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_14]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_15]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_15]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_15]](
+//
+//CHECK-LABEL: define void @MatrixOp_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [30 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %[[COUNTER]]
+//
+//CHECK-LABEL: define void @MatrixOp_
+//CHECK-SAME: [[$RUN_4:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256]
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }]
+//CHECK: call void @..generated..loop.body.[[$F_ID_16]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_16]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_16]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_17]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_17]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_17]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_18]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_18]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_18]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_19]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_19]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_19]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_20]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_20]](
+//CHECK: call void @..generated..loop.body.[[$F_ID_20]](
+//
+//CHECK-LABEL: define void @Wrapper_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %[[SIGNALS:.*]] = alloca [16 x i256]
+//CHECK-NEXT:   %[[COUNTER:.*]] = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 15, i32* %{{.*}}[[COUNTER]]
+//
+//CHECK-LABEL: define void @Wrapper_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
 //CHECK: %lvars = alloca [3 x i256]
-//CHECK: store{{.*}}:{{.*}}; preds = %unrolled_loop{{.*}}
-//CHECK: %[[SUB_PTR:.*]] = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUB_PTR]]
-//CHECK: %[[VAL_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 5
-//CHECK: %[[VAL:.*]] = load i256, i256* %[[VAL_PTR]]
-//CHECK: %[[OUTP_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK: store i256 %[[VAL]], i256* %[[OUTP_PTR]]
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK: call void @MatrixOp_[[$RUN_1]]_run([0 x i256]* %
+//CHECK: call void @MatrixOp_[[$RUN_2]]_run([0 x i256]* %
+//CHECK: call void @MatrixOp_[[$RUN_3]]_run([0 x i256]* %
+//CHECK: call void @MatrixOp_[[$RUN_4]]_run([0 x i256]* %
+//CHECK: store{{[0-9]+}}:{{ +}}; preds = %unrolled_loop{{[0-9]+}}
+//CHECK-NEXT: %[[SUB_PTR:.*]] = getelementptr [4 x { [0 x i256]*, i32 }], [4 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUB_PTR]]
+//CHECK-NEXT: %[[VAL_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 5
+//CHECK-NEXT: %[[VAL:.*]] = load i256, i256* %[[VAL_PTR]]
+//CHECK-NEXT: %[[OUTP_PTR:.*]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT: store i256 %[[VAL]], i256* %[[OUTP_PTR]]

From e69fcd63f90eb128da2a29e643fc2cb72e9ccab6 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 5 Oct 2023 09:50:58 -0500
Subject: [PATCH 12/22] add test cases

---
 circom/tests/loops/assign_in_loop.circom   | 186 ---------------------
 circom/tests/loops/assign_in_loop_1.circom | 148 ++++++++++++++++
 circom/tests/loops/assign_in_loop_2.circom |  46 +++++
 circom/tests/loops/assign_in_loop_3.circom |  46 +++++
 4 files changed, 240 insertions(+), 186 deletions(-)
 delete mode 100644 circom/tests/loops/assign_in_loop.circom
 create mode 100644 circom/tests/loops/assign_in_loop_1.circom
 create mode 100644 circom/tests/loops/assign_in_loop_2.circom
 create mode 100644 circom/tests/loops/assign_in_loop_3.circom

diff --git a/circom/tests/loops/assign_in_loop.circom b/circom/tests/loops/assign_in_loop.circom
deleted file mode 100644
index bcfa191a6..000000000
--- a/circom/tests/loops/assign_in_loop.circom
+++ /dev/null
@@ -1,186 +0,0 @@
-pragma circom 2.0.0;
-// REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.* // panicked at 'not yet implemented', circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs:149:44 (LocationRule::Mapped case)
-
-template Inner(i) {
-    signal input in;
-    signal output out;
-    
-    out <-- (in >> i) & 1;
-}
-
-template Num2Bits(n) {
-    signal input in;
-    signal output out[n];
-    
-    component c[n];
-    for (var i = 0; i < n; i++) {
-    	c[i] = Inner(i);
-    	c[i].in <-- in;
-    	out[i] <-- c[i].out;
-    }
-}
-
-component main = Num2Bits(3);
-
-//CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]],
-//CHECK-SAME: [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X3]], i256* %subc_[[X3]]){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
-//CHECK-NEXT:   br label %store1
-//CHECK-EMPTY: 
-//CHECK-NEXT: store1:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
-//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
-//CHECK-NEXT:   store i256 %1, i256* %2, align 4
-//CHECK-NEXT:   br label %store2
-//CHECK-EMPTY: 
-//CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
-//CHECK-NEXT:   call void @Inner_?_run([0 x i256]* %sub_[[X1]])                 //TODO: which function to call depends on which iteration of the loop
-//CHECK-NEXT:   br label %store3
-//CHECK-EMPTY: 
-//CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X3]], i32 0
-//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X2]], i32 0
-//CHECK-NEXT:   store i256 %5, i256* %6, align 4
-//CHECK-NEXT:   br label %store4
-//CHECK-EMPTY: 
-//CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
-//CHECK-NEXT:   br label %return5
-//CHECK-EMPTY: 
-//CHECK-NEXT: return5:
-//CHECK-NEXT:   ret void
-//CHECK-NEXT: }
-//
-//CHECK-LABEL: define void @Inner_0_build({ [0 x i256]*, i32 }* %0){{.*}} {
-//
-//CHECK-LABEL: define void @Inner_0_run([0 x i256]* %0){{.*}} {
-//
-//CHECK-LABEL: define void @Inner_1_build({ [0 x i256]*, i32 }* %0){{.*}} {
-//
-//CHECK-LABEL: define void @Inner_1_run([0 x i256]* %0){{.*}} {
-//
-//CHECK-LABEL: define void @Inner_2_build({ [0 x i256]*, i32 }* %0){{.*}} {
-//
-//CHECK-LABEL: define void @Inner_2_run([0 x i256]* %0){{.*}} {
-//
-//CHECK-LABEL: define void @Num2Bits_3_build({ [0 x i256]*, i32 }* %0){{.*}} {
-//CHECK-NEXT: main:
-//CHECK-NEXT:   %1 = alloca [4 x i256], align 8
-//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
-//CHECK-NEXT:   store i32 1, i32* %2, align 4
-//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
-//CHECK-NEXT:   %4 = bitcast [4 x i256]* %1 to [0 x i256]*
-//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
-//CHECK-NEXT:   ret void
-//CHECK-NEXT: }
-//
-//CHECK-LABEL: define void @Num2Bits_3_run([0 x i256]* %0){{.*}} {
-//CHECK-NEXT: prelude:
-//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
-//CHECK-NEXT:   %subcmps = alloca [3 x { [0 x i256]*, i32 }], align 8
-//CHECK-NEXT:   br label %store1
-//CHECK-EMPTY: 
-//CHECK-NEXT: store1:
-//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
-//CHECK-NEXT:   store i256 3, i256* %1, align 4
-//CHECK-NEXT:   br label %create_cmp2
-//CHECK-EMPTY: 
-//CHECK-NEXT: create_cmp2:
-//CHECK-NEXT:   %2 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
-//CHECK-NEXT:   call void @Inner_0_build({ [0 x i256]*, i32 }* %2)
-//CHECK-NEXT:   br label %create_cmp3
-//CHECK-EMPTY: 
-//CHECK-NEXT: create_cmp3:
-//CHECK-NEXT:   %3 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
-//CHECK-NEXT:   call void @Inner_1_build({ [0 x i256]*, i32 }* %3)
-//CHECK-NEXT:   br label %create_cmp4
-//CHECK-EMPTY: 
-//CHECK-NEXT: create_cmp4:
-//CHECK-NEXT:   %4 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
-//CHECK-NEXT:   call void @Inner_2_build({ [0 x i256]*, i32 }* %4)
-//CHECK-NEXT:   br label %store5
-//CHECK-EMPTY: 
-//CHECK-NEXT: store5:
-//CHECK-NEXT:   %5 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 0, i256* %5, align 4
-//CHECK-NEXT:   br label %unrolled_loop6
-//CHECK-EMPTY: 
-//CHECK-NEXT: unrolled_loop6:
-//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %7 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %12 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
-//CHECK-NEXT:   %16 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
-//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
-//CHECK-NEXT:   %19 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
-//CHECK-NEXT:   %21 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
-//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
-//CHECK-NEXT:   %24 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20, [0 x i256]* %23, i256* %25)
-//CHECK-NEXT:   %26 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %27 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
-//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
-//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 1
-//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %32 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %33 = load [0 x i256]*, [0 x i256]** %32, align 8
-//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %33, i32 0
-//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0, i256 0
-//CHECK-NEXT:   %36 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %37 = load [0 x i256]*, [0 x i256]** %36, align 8
-//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0
-//CHECK-NEXT:   %39 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %40 = bitcast i32* %39 to i256*
-//CHECK-NEXT:   %41 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %42 = load [0 x i256]*, [0 x i256]** %41, align 8
-//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %42, i32 0
-//CHECK-NEXT:   %44 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %45 = bitcast i32* %44 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %26, [0 x i256]* %0, i256* %30, i256* %31, i256* %35, [0 x i256]* %38, i256* %40, [0 x i256]* %43, i256* %45)
-//CHECK-NEXT:   %46 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %47 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %48 = load [0 x i256]*, [0 x i256]** %47, align 8
-//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %48, i32 0
-//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %49, i32 0, i256 1
-//CHECK-NEXT:   %51 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %52 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %53 = load [0 x i256]*, [0 x i256]** %52, align 8
-//CHECK-NEXT:   %54 = getelementptr [0 x i256], [0 x i256]* %53, i32 0
-//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %54, i32 0, i256 0
-//CHECK-NEXT:   %56 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %57 = load [0 x i256]*, [0 x i256]** %56, align 8
-//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %57, i32 0
-//CHECK-NEXT:   %59 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
-//CHECK-NEXT:   %60 = bitcast i32* %59 to i256*
-//CHECK-NEXT:   %61 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %62 = load [0 x i256]*, [0 x i256]** %61, align 8
-//CHECK-NEXT:   %63 = getelementptr [0 x i256], [0 x i256]* %62, i32 0
-//CHECK-NEXT:   %64 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
-//CHECK-NEXT:   %65 = bitcast i32* %64 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %46, [0 x i256]* %0, i256* %50, i256* %51, i256* %55, [0 x i256]* %58, i256* %60, [0 x i256]* %63, i256* %65)
-//CHECK-NEXT:   br label %prologue
-//CHECK-EMPTY: 
-//CHECK-NEXT: prologue:
-//CHECK-NEXT:   ret void
-//CHECK-NEXT: }
diff --git a/circom/tests/loops/assign_in_loop_1.circom b/circom/tests/loops/assign_in_loop_1.circom
new file mode 100644
index 000000000..acac850d7
--- /dev/null
+++ b/circom/tests/loops/assign_in_loop_1.circom
@@ -0,0 +1,148 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Inner() {
+    signal input in;
+    signal output out;
+    
+    out <-- in & 1;
+}
+
+template Num2Bits(n) {
+    signal input in;
+    signal output out[n];
+    
+    component c[n];
+    for (var i = 0; i < n; i++) {
+    	c[i] = Inner();
+    	c[i].in <-- in;
+    	out[i] <-- c[i].out;
+    }
+}
+
+component main = Num2Bits(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.{{[0-9]+\.T}}([0 x i256]* %lvars, [0 x i256]* %signals, 
+//CHECK-SAME: i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]], 
+//CHECK-SAME: [0 x i256]* %sub_[[X3]], i256* %subc_[[X3]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1:[0-9]+\.T]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = load i256, i256* %subc_[[X3]], align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 1)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subc_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %4, align 4
+//CHECK-NEXT:   br label %fold_true3
+//CHECK-EMPTY: 
+//CHECK-NEXT: fold_true3:
+//CHECK-NEXT:   call void @llvm.donothing()
+//CHECK-NEXT:   call void @Inner_0_run([0 x i256]* %sub_[[X3]])
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X3]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [3 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Inner_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   %3 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Inner_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   %4 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
+//CHECK-NEXT:   call void @Inner_0_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %5 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %12 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
+//CHECK-NEXT:   %16 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
+//CHECK-NEXT:   %19 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20)
+//CHECK-NEXT:   %21 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %22 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %23 = load [0 x i256]*, [0 x i256]** %22, align 8
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %24, i32 0, i256 1
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %27 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
+//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 0
+//CHECK-NEXT:   %31 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %32 = load [0 x i256]*, [0 x i256]** %31, align 8
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %32, i32 0
+//CHECK-NEXT:   %34 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %35 = bitcast i32* %34 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %21, [0 x i256]* %0, i256* %25, i256* %26, i256* %30, [0 x i256]* %33, i256* %35)
+//CHECK-NEXT:   %36 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %37 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %38 = load [0 x i256]*, [0 x i256]** %37, align 8
+//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0, i256 1
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %42 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
+//CHECK-NEXT:   %45 = getelementptr [0 x i256], [0 x i256]* %44, i32 0, i256 0
+//CHECK-NEXT:   %46 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %47 = load [0 x i256]*, [0 x i256]** %46, align 8
+//CHECK-NEXT:   %48 = getelementptr [0 x i256], [0 x i256]* %47, i32 0
+//CHECK-NEXT:   %49 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %50 = bitcast i32* %49 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %36, [0 x i256]* %0, i256* %40, i256* %41, i256* %45, [0 x i256]* %48, i256* %50)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/assign_in_loop_2.circom b/circom/tests/loops/assign_in_loop_2.circom
new file mode 100644
index 000000000..f6a7caa39
--- /dev/null
+++ b/circom/tests/loops/assign_in_loop_2.circom
@@ -0,0 +1,46 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// References to the Inner subcmp use LocationRule::Mapped because of the 'i' parameter
+template Inner(i) {
+    signal input in;
+    signal output out;
+    
+    out <-- in & i;
+}
+
+template Num2Bits(n) {
+    signal input in;
+    signal output out[n];
+    
+    component c[n];
+    for (var i = 0; i < n; i++) {
+    	c[i] = Inner(i);
+    	c[i].in <-- in;
+    	out[i] <-- c[i].out;
+    }
+}
+
+component main = Num2Bits(3);
+
+//CHECK-LABEL: define void @Inner_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_
+//CHECK-SAME: [[$RUN_1:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_
+//CHECK-SAME: [[$RUN_2:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_
+//CHECK-SAME: [[$RUN_3:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK: call void @Inner_[[$RUN_1]]_run([0 x i256]* %
+//CHECK: call void @Inner_[[$RUN_2]]_run([0 x i256]* %
+//CHECK: call void @Inner_[[$RUN_3]]_run([0 x i256]* %
diff --git a/circom/tests/loops/assign_in_loop_3.circom b/circom/tests/loops/assign_in_loop_3.circom
new file mode 100644
index 000000000..cef469df3
--- /dev/null
+++ b/circom/tests/loops/assign_in_loop_3.circom
@@ -0,0 +1,46 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// References to the Inner subcmp use LocationRule::Mapped because of the 'i' parameter
+template Inner(i,j) {
+    signal input in;
+    signal output out;
+    
+    out <-- (in >> i) & j;
+}
+
+template Num2Bits(n) {
+    signal input in;
+    signal output out[n];
+    
+    component c[n];
+    for (var i = 0; i < n; i++) {
+    	c[i] = Inner(i, i+1);
+    	c[i].in <-- in;
+    	out[i] <-- c[i].out;
+    }
+}
+
+component main = Num2Bits(3);
+
+//CHECK-LABEL: define void @Inner_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_
+//CHECK-SAME: [[$RUN_1:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_
+//CHECK-SAME: [[$RUN_2:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_{{[0-9]+}}_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Inner_
+//CHECK-SAME: [[$RUN_3:[0-9]+]]_run([0 x i256]* %0){{.*}} {
+//
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK: call void @Inner_[[$RUN_1]]_run([0 x i256]* %
+//CHECK: call void @Inner_[[$RUN_2]]_run([0 x i256]* %
+//CHECK: call void @Inner_[[$RUN_3]]_run([0 x i256]* %

From 0b5977ebdd5ebe15cd0d34095d2b04c26cd79d2f Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 9 Oct 2023 13:30:40 -0500
Subject: [PATCH 13/22] handle CallBucket return case in mapped_to_indexed

---
 circuit_passes/src/passes/mapped_to_indexed.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 0cec02eb8..999056a84 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -141,7 +141,10 @@ impl InterpreterObserver for MappedToIndexedPass<'_> {
         true
     }
 
-    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
+    fn on_call_bucket(&self, bucket: &CallBucket, env: &Env) -> bool {
+        if let ReturnType::Final(fd) = &bucket.return_info {
+            self.maybe_transform_location(&bucket.id, &fd.dest_address_type, &fd.dest, env);
+        }
         true
     }
 

From 760ba21ea9b43650988f93398092756cd17a6e17 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 18 Oct 2023 15:26:57 -0500
Subject: [PATCH 14/22] Fix the tests (due to rebasing)

---
 circom/tests/loops/assign_in_loop_1.circom | 1 +
 circom/tests/subcmps/mapped.circom         | 1 +
 circom/tests/subcmps/mapped2.circom        | 1 +
 circom/tests/subcmps/mapped3.circom        | 1 +
 circom/tests/subcmps/mapped4.circom        | 1 +
 5 files changed, 5 insertions(+)

diff --git a/circom/tests/loops/assign_in_loop_1.circom b/circom/tests/loops/assign_in_loop_1.circom
index acac850d7..54fba3e15 100644
--- a/circom/tests/loops/assign_in_loop_1.circom
+++ b/circom/tests/loops/assign_in_loop_1.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template Inner() {
     signal input in;
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index 8e16e7307..ed3eab483 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -2,6 +2,7 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index 0e1670a01..cfb12afd3 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -2,6 +2,7 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index e2957eaff..db960d89a 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template ArrayOp(q) {
     signal input inp[15];
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index a0a771958..9e9b00c16 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template MatrixOp(q) {
     signal input inp[5][3];

From 4a75e652494ed475aea6a62308fc4c64450c00dc Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 19 Oct 2023 14:34:03 -0500
Subject: [PATCH 15/22] fix body_extractor assertion failure

---
 .../tests/loops/fixed_idx_in_fixed_idx.circom | 119 ++++++++++++++++++
 .../passes/loop_unroll/loop_env_recorder.rs   |   7 +-
 2 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 circom/tests/loops/fixed_idx_in_fixed_idx.circom

diff --git a/circom/tests/loops/fixed_idx_in_fixed_idx.circom b/circom/tests/loops/fixed_idx_in_fixed_idx.circom
new file mode 100644
index 000000000..6f34c84b3
--- /dev/null
+++ b/circom/tests/loops/fixed_idx_in_fixed_idx.circom
@@ -0,0 +1,119 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// This case initially triggered the "assert!(bucket_to_args.is_empty());" line in body_extractor.rs
+//  because the entire expression 'in[byte_order[i]]'' is replaced but the 'byte_order[i]' expression
+//  is also listed in the "bucket_to_args" map as a safe replacement.
+template EmulatedAesencRowShifting() {
+    signal input in[16];
+    signal output out[16];
+    
+    var byte_order[16] = [0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11];
+
+    for (var i = 0; i < 16; i++) {
+        out[i] <== in[byte_order[i]];
+    }
+}
+
+component main = EmulatedAesencRowShifting();
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0, i256* %fix_1){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_1, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 16
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 1)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 16
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @EmulatedAesencRowShifting_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [17 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY:
+//CHECK:      unrolled_loop18:
+//CHECK-NEXT:   %18 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 16
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %18, [0 x i256]* %0, i256* %19, i256* %20)
+//CHECK-NEXT:   %21 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 21
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %21, [0 x i256]* %0, i256* %22, i256* %23)
+//CHECK-NEXT:   %24 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 26
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %24, [0 x i256]* %0, i256* %25, i256* %26)
+//CHECK-NEXT:   %27 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 31
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %27, [0 x i256]* %0, i256* %28, i256* %29)
+//CHECK-NEXT:   %30 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 20
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %30, [0 x i256]* %0, i256* %31, i256* %32)
+//CHECK-NEXT:   %33 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 25
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %33, [0 x i256]* %0, i256* %34, i256* %35)
+//CHECK-NEXT:   %36 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 6
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 30
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %36, [0 x i256]* %0, i256* %37, i256* %38)
+//CHECK-NEXT:   %39 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 7
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 19
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %39, [0 x i256]* %0, i256* %40, i256* %41)
+//CHECK-NEXT:   %42 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 8
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 24
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %42, [0 x i256]* %0, i256* %43, i256* %44)
+//CHECK-NEXT:   %45 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 9
+//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 29
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %45, [0 x i256]* %0, i256* %46, i256* %47)
+//CHECK-NEXT:   %48 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 10
+//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 18
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %48, [0 x i256]* %0, i256* %49, i256* %50)
+//CHECK-NEXT:   %51 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %52 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 11
+//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 23
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %51, [0 x i256]* %0, i256* %52, i256* %53)
+//CHECK-NEXT:   %54 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 12
+//CHECK-NEXT:   %56 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 28
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %54, [0 x i256]* %0, i256* %55, i256* %56)
+//CHECK-NEXT:   %57 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 13
+//CHECK-NEXT:   %59 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 17
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %57, [0 x i256]* %0, i256* %58, i256* %59)
+//CHECK-NEXT:   %60 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %61 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 14
+//CHECK-NEXT:   %62 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 22
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %60, [0 x i256]* %0, i256* %61, i256* %62)
+//CHECK-NEXT:   %63 = bitcast [17 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %64 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 15
+//CHECK-NEXT:   %65 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 27
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %63, [0 x i256]* %0, i256* %64, i256* %65)
+//CHECK-NEXT:   br label %prologue
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index ef51cbb72..7c1dfa81f 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -224,7 +224,12 @@ impl InterpreterObserver for EnvRecorder<'_, '_> {
             todo!(); //not sure if/how to handle that
         }
         self.visit(&bucket.id, &bucket.address_type, &bucket.src, env);
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
+        // For a LoadBucket, there is no need to continue observing inside it and doing
+        //  so can actually cause "assert!(bucket_to_args.is_empty())" to fail. See
+        //  test "loops/fixed_idx_in_fixed_idx.circom" for an example and explanation.
+        //  This is not applicable to other buckets because they have additional content
+        //  inside of them that must be observed.
+        false
     }
 
     fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {

From 31a30f7695564f7df4ca1287097b78d671dd1cfc Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 19 Oct 2023 17:10:41 -0500
Subject: [PATCH 16/22] fix segfault from improper GEP

---
 code_producers/src/llvm_elements/functions.rs       | 10 ++++++++++
 code_producers/src/llvm_elements/mod.rs             | 13 ++++++++++++-
 code_producers/src/llvm_elements/template.rs        |  9 +++++++++
 .../src/intermediate_representation/call_bucket.rs  |  4 +---
 .../src/intermediate_representation/load_bucket.rs  |  8 +-------
 .../src/intermediate_representation/store_bucket.rs |  8 +-------
 6 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 83ab3b1b2..7289f9826 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -219,6 +219,16 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
         }
     }
 
+    fn get_subcmp_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        subcmp_id: AnyValueEnum<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a> {
+        assert_eq!(zero(producer), index);
+        create_gep(producer, self.load_subcmp_addr(producer, subcmp_id), &[index])
+    }
+
     fn get_signal(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index e876cd7de..ac5081851 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -69,6 +69,14 @@ pub trait TemplateCtx<'a> {
         implicit: bool,
     ) -> Option<PointerValue<'a>>;
 
+    /// Returns a pointer to the signal associated to given subcomponent id and index
+    fn get_subcmp_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        subcmp_id: AnyValueEnum<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a>;
+
     /// Returns a pointer to the signal associated to the index
     fn get_signal(
         &self,
@@ -193,7 +201,10 @@ impl<'a> TopLevelLLVMIRProducer<'a> {
 pub type LLVMAdapter<'a> = &'a Rc<RefCell<LLVM<'a>>>;
 pub type BigIntType<'a> = IntType<'a>; // i256
 
-pub fn new_constraint_with_name<'a>(producer: &dyn LLVMIRProducer<'a>, name: &str) -> AnyValueEnum<'a> {
+pub fn new_constraint_with_name<'a>(
+    producer: &dyn LLVMIRProducer<'a>,
+    name: &str,
+) -> AnyValueEnum<'a> {
     let alloca = create_alloca(producer, bool_type(producer).into(), name);
     let s = producer.context().metadata_string("constraint");
     let kind = producer.context().get_kind_id("constraint");
diff --git a/code_producers/src/llvm_elements/template.rs b/code_producers/src/llvm_elements/template.rs
index 4cfd418e5..66924c435 100644
--- a/code_producers/src/llvm_elements/template.rs
+++ b/code_producers/src/llvm_elements/template.rs
@@ -102,6 +102,15 @@ impl<'a> TemplateCtx<'a> for StdTemplateCtx<'a> {
         )
     }
 
+    fn get_subcmp_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        subcmp_id: AnyValueEnum<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a> {
+        create_gep(producer, self.load_subcmp_addr(producer, subcmp_id), &[zero(producer), index])
+    }
+
     fn get_signal(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
diff --git a/compiler/src/intermediate_representation/call_bucket.rs b/compiler/src/intermediate_representation/call_bucket.rs
index f59b8b3d0..62c8f88fd 100644
--- a/compiler/src/intermediate_representation/call_bucket.rs
+++ b/compiler/src/intermediate_representation/call_bucket.rs
@@ -163,9 +163,7 @@ impl WriteLLVMIR for CallBucket {
                                     let addr = cmp_address.produce_llvm_ir(producer).expect(
                                         "The address of a subcomponent must yield a value!",
                                     );
-                                    let subcmp =
-                                        producer.template_ctx().load_subcmp_addr(producer, addr);
-                                    create_gep(producer, subcmp, &[zero(producer), index])
+                                    producer.template_ctx().get_subcmp_signal(producer, addr, index)
                                 }
                             }
                             .into_pointer_value();
diff --git a/compiler/src/intermediate_representation/load_bucket.rs b/compiler/src/intermediate_representation/load_bucket.rs
index abd714783..bc39ba4e9 100644
--- a/compiler/src/intermediate_representation/load_bucket.rs
+++ b/compiler/src/intermediate_representation/load_bucket.rs
@@ -115,13 +115,7 @@ impl WriteLLVMIR for LoadBucket {
                         if *counter_override {
                             producer.template_ctx().load_subcmp_counter(producer, addr, false).expect("could not find counter!")
                         } else {
-                            let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                            if subcmp.get_type().get_element_type().is_array_type() {
-                                create_gep(producer, subcmp, &[zero(producer), index]).into_pointer_value()
-                            } else {
-                                assert_eq!(zero(producer), index);
-                                create_gep(producer, subcmp, &[index]).into_pointer_value()
-                            }
+                            producer.template_ctx().get_subcmp_signal(producer, addr, index).into_pointer_value()
                         }
                     }
                 };
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 2e76b63fd..2a66d44dd 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -143,13 +143,7 @@ impl StoreBucket {
                         let addr = cmp_address
                             .produce_llvm_ir(producer)
                             .expect("The address of a subcomponent must yield a value!");
-                        let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        if subcmp.get_type().get_element_type().is_array_type() {
-                            create_gep(producer, subcmp, &[zero(producer), dest_index])
-                        } else {
-                            assert_eq!(zero(producer), dest_index);
-                            create_gep(producer, subcmp, &[dest_index])
-                        }
+                        producer.template_ctx().get_subcmp_signal(producer, addr, dest_index)
                     }
                 }
                 .into_pointer_value();

From 0111f3d257223b52be4ebcedc2fbdf8e1dc2412c Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Fri, 20 Oct 2023 09:50:20 -0500
Subject: [PATCH 17/22] use BucketId as key to speed up SimplificationPass

---
 circuit_passes/src/passes/simplification.rs | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 8d83f1d6c..b17159e6b 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -1,8 +1,8 @@
 use std::cell::RefCell;
-use std::collections::BTreeMap;
+use std::collections::HashMap;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
-use compiler::intermediate_representation::{InstructionPointer, new_id};
+use compiler::intermediate_representation::{InstructionPointer, new_id, BucketId};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
@@ -14,9 +14,8 @@ pub struct SimplificationPass<'d> {
     global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
-    compute_replacements: RefCell<BTreeMap<ComputeBucket, Value>>,
-    call_replacements: RefCell<BTreeMap<CallBucket, Value>>,
-    //TODO: could use BucketId instead of cloning buckets for keys
+    compute_replacements: RefCell<HashMap<BucketId, Value>>,
+    call_replacements: RefCell<HashMap<BucketId, Value>>,
 }
 
 impl<'d> SimplificationPass<'d> {
@@ -49,7 +48,7 @@ impl InterpreterObserver for SimplificationPass<'_> {
         let (eval, _) = interpreter.execute_compute_bucket(bucket, env, false);
         let eval = eval.expect("Compute bucket must produce a value!");
         if !eval.is_unknown() {
-            self.compute_replacements.borrow_mut().insert(bucket.clone(), eval);
+            self.compute_replacements.borrow_mut().insert(bucket.id, eval);
             return false;
         }
         true
@@ -90,7 +89,7 @@ impl InterpreterObserver for SimplificationPass<'_> {
         if let Some(eval) = eval {
             // Call buckets may not return a value directly
             if !eval.is_unknown() {
-                self.call_replacements.borrow_mut().insert(bucket.clone(), eval);
+                self.call_replacements.borrow_mut().insert(bucket.id, eval);
                 return false;
             }
         }
@@ -132,7 +131,7 @@ impl CircuitTransformationPass for SimplificationPass<'_> {
     }
 
     fn transform_compute_bucket(&self, bucket: &ComputeBucket) -> InstructionPointer {
-        if let Some(value) = self.compute_replacements.borrow().get(&bucket) {
+        if let Some(value) = self.compute_replacements.borrow().get(&bucket.id) {
             return value.to_value_bucket(&self.memory).allocate();
         }
         ComputeBucket {
@@ -148,7 +147,7 @@ impl CircuitTransformationPass for SimplificationPass<'_> {
     }
 
     fn transform_call_bucket(&self, bucket: &CallBucket) -> InstructionPointer {
-        if let Some(value) = self.call_replacements.borrow().get(&bucket) {
+        if let Some(value) = self.call_replacements.borrow().get(&bucket.id) {
             return value.to_value_bucket(&self.memory).allocate();
         }
         CallBucket {

From 80800a9bdd15eed12544641957744e287c24b013 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Fri, 20 Oct 2023 11:48:20 -0500
Subject: [PATCH 18/22] fix function name to reflect actual usage

---
 circuit_passes/src/bucket_interpreter/mod.rs                | 6 +++---
 circuit_passes/src/bucket_interpreter/observer.rs           | 2 +-
 circuit_passes/src/passes/conditional_flattening.rs         | 2 +-
 .../src/passes/deterministic_subcomponent_invocation.rs     | 2 +-
 circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs  | 2 +-
 circuit_passes/src/passes/loop_unroll/mod.rs                | 2 +-
 circuit_passes/src/passes/mapped_to_indexed.rs              | 2 +-
 circuit_passes/src/passes/simplification.rs                 | 2 +-
 circuit_passes/src/passes/unknown_index_sanitization.rs     | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index c3fa1fd1f..bc4823e08 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -416,7 +416,7 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         (computed_value, env)
     }
 
-    fn run_function_loopbody<'env>(
+    fn run_function_extracted<'env>(
         &self,
         bucket: &'env CallBucket,
         env: Env<'env>,
@@ -443,7 +443,7 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         //NOTE: Do not change scope for the new interpreter because the mem lookups within
         //  `get_write_operations_in_store_bucket` need to use the original function context.
         let interp = self.mem.build_interpreter(self.global_data, self.observer);
-        let observe = observe && !interp.observer.ignore_loopbody_function_calls();
+        let observe = observe && !interp.observer.ignore_extracted_function_calls();
         let instructions = &env.get_function(name).body;
         unsafe {
             let ptr = instructions.as_ptr();
@@ -487,7 +487,7 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
             // The extracted loop body and array parameter functions can change any values in
             //  the environment via the parameters passed to it. So interpret the function and
             //  keep the resulting Env (as if the function had executed inline).
-            self.run_function_loopbody(&bucket, env, observe)
+            self.run_function_extracted(&bucket, env, observe)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
diff --git a/circuit_passes/src/bucket_interpreter/observer.rs b/circuit_passes/src/bucket_interpreter/observer.rs
index f233055f6..f21186e52 100644
--- a/circuit_passes/src/bucket_interpreter/observer.rs
+++ b/circuit_passes/src/bucket_interpreter/observer.rs
@@ -46,5 +46,5 @@ pub trait InterpreterObserver {
 
     fn ignore_function_calls(&self) -> bool;
     fn ignore_subcmp_calls(&self) -> bool;
-    fn ignore_loopbody_function_calls(&self) -> bool;
+    fn ignore_extracted_function_calls(&self) -> bool;
 }
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 6a804b1be..15c537e75 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -158,7 +158,7 @@ impl InterpreterObserver for ConditionalFlatteningPass<'_> {
         true
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         false
     }
 }
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 1c16277f5..7a9f5e985 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -124,7 +124,7 @@ impl InterpreterObserver for DeterministicSubCmpInvokePass<'_> {
         true
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         false
     }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 7c1dfa81f..9498db412 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -303,7 +303,7 @@ impl InterpreterObserver for EnvRecorder<'_, '_> {
         true
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         true
     }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index dac33497a..48c261413 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -204,7 +204,7 @@ impl InterpreterObserver for LoopUnrollPass<'_> {
         true
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         true
     }
 }
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 999056a84..8caa709f1 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -168,7 +168,7 @@ impl InterpreterObserver for MappedToIndexedPass<'_> {
         true
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         false
     }
 }
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index b17159e6b..985342844 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -116,7 +116,7 @@ impl InterpreterObserver for SimplificationPass<'_> {
         true
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         true
     }
 }
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index 6cce4821f..a8b419777 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -253,7 +253,7 @@ impl InterpreterObserver for UnknownIndexSanitizationPass<'_> {
         false
     }
 
-    fn ignore_loopbody_function_calls(&self) -> bool {
+    fn ignore_extracted_function_calls(&self) -> bool {
         true
     }
 }

From ecfdc589219647270ee66ffa08f1a8c5a3edeb53 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Fri, 20 Oct 2023 16:41:24 -0500
Subject: [PATCH 19/22] Remove functions that cannot be reached from any
 template

---
 circom/src/compilation_user.rs                |   1 +
 .../tests/zzz/unreachable_code_crash.circom   |  41 +++++
 .../src/bucket_interpreter/memory.rs          |   8 +-
 circuit_passes/src/bucket_interpreter/mod.rs  |   7 +-
 .../bucket_interpreter/observed_visitor.rs    | 145 +++++++++++++++
 .../src/bucket_interpreter/observer.rs        |  78 ++++----
 .../src/passes/conditional_flattening.rs      |   4 +-
 .../deterministic_subcomponent_invocation.rs  |   4 +-
 .../passes/loop_unroll/loop_env_recorder.rs   |   4 +-
 circuit_passes/src/passes/loop_unroll/mod.rs  |   4 +-
 .../src/passes/mapped_to_indexed.rs           |   4 +-
 circuit_passes/src/passes/mod.rs              |  11 ++
 circuit_passes/src/passes/simplification.rs   |   4 +-
 .../src/passes/unknown_index_sanitization.rs  |   4 +-
 .../src/passes/unused_func_removal.rs         | 167 ++++++++++++++++++
 code_producers/src/llvm_elements/fr.rs        |  15 ++
 code_producers/src/llvm_elements/functions.rs |   2 +-
 17 files changed, 446 insertions(+), 57 deletions(-)
 create mode 100644 circom/tests/zzz/unreachable_code_crash.circom
 create mode 100644 circuit_passes/src/bucket_interpreter/observed_visitor.rs
 create mode 100644 circuit_passes/src/passes/unused_func_removal.rs

diff --git a/circom/src/compilation_user.rs b/circom/src/compilation_user.rs
index e802ebc0f..e27be8cda 100644
--- a/circom/src/compilation_user.rs
+++ b/circom/src/compilation_user.rs
@@ -65,6 +65,7 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
             .schedule_const_arg_deduplication_pass()
             .schedule_loop_unroll_pass()
             .schedule_conditional_flattening_pass()
+            .schedule_unused_function_removal_pass() //previous 2 passes create the dead functions
             .schedule_mapped_to_indexed_pass()
             .schedule_unknown_index_sanitization_pass()
             .schedule_simplification_pass()
diff --git a/circom/tests/zzz/unreachable_code_crash.circom b/circom/tests/zzz/unreachable_code_crash.circom
new file mode 100644
index 000000000..e63e9810e
--- /dev/null
+++ b/circom/tests/zzz/unreachable_code_crash.circom
@@ -0,0 +1,41 @@
+pragma circom 2.0.2;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+
+template OR() {
+    signal input a;
+}
+
+template InvalidArgIndex(n, k) {
+    component has_prev_non_zero[k * n];
+    for (var i = k - 1; i >= 0; i--) {
+        for (var j = n - 1; j >= 0; j--) {
+            has_prev_non_zero[n * i + j] = OR();
+            if (i == k - 1 && j == n - 1) {
+                // StoreBucket here causes a crash in `get_arg_ptr` 
+                // Here's what happens. The outer loop unrolls first, 2 iterations. In the second
+                //  iteration, this branch of the if-else will never execute so in the generated
+                //  "loop.body" function, this branch is dead code, thus no parameter was added
+                //  to the function to reference the destination of this StoreBucket and the 
+                //  location information was not updated so there is an invalid parameter reference
+                //  that causes 'functions.rs::get_arg_ptr' to crash, but it's in dead code.
+                has_prev_non_zero[n * i + j].a <-- 99;
+            } else {
+                has_prev_non_zero[n * i + j].a <-- 33;
+            }
+        }
+    }
+}
+
+component main = InvalidArgIndex(3, 2);
+
+//// Check that only the proper versions of the generated functions remain
+//// (i.e. the initial one was removed after conditional flattening).
+//
+//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
+//CHECK:     define void @..generated..loop.body.[[[0-9]+]].F(
+//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
+//CHECK:     define void @..generated..loop.body.[[[0-9]+]].T(
+//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
+//CHECK:     define void @..generated..loop.body.[[[0-9]+]].F.T(
+//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
diff --git a/circuit_passes/src/bucket_interpreter/memory.rs b/circuit_passes/src/bucket_interpreter/memory.rs
index 368474491..f16fc904c 100644
--- a/circuit_passes/src/bucket_interpreter/memory.rs
+++ b/circuit_passes/src/bucket_interpreter/memory.rs
@@ -8,7 +8,7 @@ use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::env::{Env, LibraryAccess};
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use crate::passes::GlobalPassData;
 
 pub struct PassMemory {
@@ -44,7 +44,7 @@ impl PassMemory {
     pub fn build_interpreter<'a, 'd: 'a>(
         &'a self,
         global_data: &'d RefCell<GlobalPassData>,
-        observer: &'a dyn InterpreterObserver,
+        observer: &'a dyn for<'e> Observer<Env<'e>>,
     ) -> BucketInterpreter {
         self.build_interpreter_with_scope(
             global_data,
@@ -56,7 +56,7 @@ impl PassMemory {
     pub fn build_interpreter_with_scope<'a, 'd: 'a>(
         &'a self,
         global_data: &'d RefCell<GlobalPassData>,
-        observer: &'a dyn InterpreterObserver,
+        observer: &'a dyn for<'e> Observer<Env<'e>>,
         scope: String,
     ) -> BucketInterpreter {
         BucketInterpreter::init(global_data, observer, self, scope)
@@ -69,7 +69,7 @@ impl PassMemory {
     pub fn run_template<'d>(
         &self,
         global_data: &'d RefCell<GlobalPassData>,
-        observer: &dyn InterpreterObserver,
+        observer: &dyn for<'e> Observer<Env<'e>>,
         template: &TemplateCode,
     ) {
         assert!(!self.current_scope.borrow().is_empty());
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index bc4823e08..2f5966a4e 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -2,6 +2,7 @@ pub mod value;
 pub mod env;
 pub mod memory;
 pub mod observer;
+pub mod observed_visitor;
 pub(crate) mod operations;
 
 use std::cell::RefCell;
@@ -12,7 +13,7 @@ use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer};
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::num_bigint::BigInt;
-use observer::InterpreterObserver;
+use observer::Observer;
 use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
@@ -24,7 +25,7 @@ use self::env::LibraryAccess;
 
 pub struct BucketInterpreter<'a, 'd> {
     global_data: &'d RefCell<GlobalPassData>,
-    observer: &'a dyn InterpreterObserver,
+    observer: &'a dyn for<'e> Observer<Env<'e>>,
     mem: &'a PassMemory,
     scope: String,
     p: BigInt,
@@ -35,7 +36,7 @@ pub type R<'a> = (Option<Value>, Env<'a>);
 impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
     pub fn init(
         global_data: &'d RefCell<GlobalPassData>,
-        observer: &'a dyn InterpreterObserver,
+        observer: &'a dyn for<'e> Observer<Env<'e>>,
         mem: &'a PassMemory,
         scope: String,
     ) -> Self {
diff --git a/circuit_passes/src/bucket_interpreter/observed_visitor.rs b/circuit_passes/src/bucket_interpreter/observed_visitor.rs
new file mode 100644
index 000000000..57bae3a9a
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/observed_visitor.rs
@@ -0,0 +1,145 @@
+use code_producers::llvm_elements::fr::BUILT_IN_NAMES;
+use compiler::intermediate_representation::InstructionPointer;
+use compiler::intermediate_representation::ir_interface::*;
+use super::env::LibraryAccess;
+use super::observer::Observer;
+
+pub struct ObservedVisitor<'a, S> {
+    observer: &'a dyn Observer<S>,
+    libs: Option<&'a dyn LibraryAccess>,
+}
+
+impl<'a, S> ObservedVisitor<'a, S> {
+    pub fn new(observer: &'a dyn Observer<S>, libs: Option<&'a dyn LibraryAccess>) -> Self {
+        ObservedVisitor { observer, libs }
+    }
+
+    pub fn visit_address_type(&self, addr_type: &AddressType, state: &S, observe: bool) {
+        if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
+            self.visit_instruction(cmp_address, state, observe);
+        }
+    }
+
+    pub fn visit_location_rule(&self, location_rule: &LocationRule, state: &S, observe: bool) {
+        match location_rule {
+            LocationRule::Indexed { location, .. } => {
+                self.visit_instruction(location, state, observe);
+            }
+            LocationRule::Mapped { indexes, .. } => {
+                self.visit_instructions(indexes, state, observe)
+            }
+        }
+    }
+
+    pub fn visit_load_bucket(&self, bucket: &LoadBucket, state: &S, observe: bool) {
+        self.visit_address_type(&bucket.address_type, state, observe);
+        self.visit_location_rule(&bucket.src, state, observe);
+    }
+
+    pub fn visit_store_bucket(&self, bucket: &StoreBucket, state: &S, observe: bool) {
+        self.visit_instruction(&bucket.src, state, observe);
+        self.visit_address_type(&bucket.dest_address_type, state, observe);
+        self.visit_location_rule(&bucket.dest, state, observe);
+    }
+
+    pub fn visit_call_bucket(&self, bucket: &CallBucket, state: &S, observe: bool) {
+        self.visit_instructions(&bucket.arguments, state, observe);
+        if let ReturnType::Final(fd) = &bucket.return_info {
+            self.visit_address_type(&fd.dest_address_type, state, observe);
+            self.visit_location_rule(&fd.dest, state, observe);
+        }
+        // Visit the callee function body if LibraryAccess was provided
+        if let Some(libs) = self.libs {
+            let name = &bucket.symbol;
+            // Skip those that cannot be visited (i.e. not yet in Circuit.functions)
+            if !BUILT_IN_NAMES.with(|f| f.contains(name.as_str())) {
+                self.visit_instructions(
+                    &libs.get_function(name).body,
+                    state,
+                    observe && !self.observer.ignore_call(name),
+                );
+            }
+        }
+    }
+
+    pub fn visit_compute_bucket(&self, bucket: &ComputeBucket, state: &S, observe: bool) {
+        self.visit_instructions(&bucket.stack, state, observe);
+    }
+
+    pub fn visit_assert_bucket(&self, bucket: &AssertBucket, state: &S, observe: bool) {
+        self.visit_instruction(&bucket.evaluate, state, observe);
+    }
+
+    pub fn visit_loop_bucket(&self, bucket: &LoopBucket, state: &S, observe: bool) {
+        self.visit_instruction(&bucket.continue_condition, state, observe);
+        self.visit_instructions(&bucket.body, state, observe);
+    }
+
+    pub fn visit_create_cmp_bucket(&self, bucket: &CreateCmpBucket, state: &S, observe: bool) {
+        self.visit_instruction(&bucket.sub_cmp_id, state, observe);
+    }
+
+    pub fn visit_constraint_bucket(&self, bucket: &ConstraintBucket, state: &S, observe: bool) {
+        self.visit_instruction(
+            match bucket {
+                ConstraintBucket::Substitution(i) => i,
+                ConstraintBucket::Equality(i) => i,
+            },
+            state,
+            observe,
+        );
+    }
+
+    pub fn visit_block_bucket(&self, bucket: &BlockBucket, state: &S, observe: bool) {
+        self.visit_instructions(&bucket.body, state, observe);
+    }
+
+    pub fn visit_branch_bucket(&self, bucket: &BranchBucket, state: &S, observe: bool) {
+        self.visit_instruction(&bucket.cond, state, observe);
+        self.visit_instructions(&bucket.if_branch, state, observe);
+        self.visit_instructions(&bucket.else_branch, state, observe);
+    }
+
+    pub fn visit_return_bucket(&self, bucket: &ReturnBucket, state: &S, observe: bool) {
+        self.visit_instruction(&bucket.value, state, observe);
+    }
+
+    pub fn visit_log_bucket(&self, bucket: &LogBucket, state: &S, observe: bool) {
+        for arg in &bucket.argsprint {
+            if let LogBucketArg::LogExp(i) = arg {
+                self.visit_instruction(i, state, observe);
+            }
+        }
+    }
+
+    pub fn visit_value_bucket(&self, _bucket: &ValueBucket, _state: &S, _observe: bool) {}
+
+    pub fn visit_nop_bucket(&self, _bucket: &NopBucket, _state: &S, _observe: bool) {}
+
+    pub fn visit_instructions(&self, insts: &Vec<InstructionPointer>, state: &S, observe: bool) {
+        for i in insts {
+            self.visit_instruction(i, state, observe);
+        }
+    }
+
+    pub fn visit_instruction(&self, inst: &InstructionPointer, state: &S, observe: bool) {
+        let keep_observing =
+            if observe { self.observer.on_instruction(inst, state) } else { observe };
+        match inst.as_ref() {
+            Instruction::Value(b) => self.visit_value_bucket(b, state, keep_observing),
+            Instruction::Load(b) => self.visit_load_bucket(b, state, keep_observing),
+            Instruction::Store(b) => self.visit_store_bucket(b, state, keep_observing),
+            Instruction::Compute(b) => self.visit_compute_bucket(b, state, keep_observing),
+            Instruction::Call(b) => self.visit_call_bucket(b, state, keep_observing),
+            Instruction::Branch(b) => self.visit_branch_bucket(b, state, keep_observing),
+            Instruction::Return(b) => self.visit_return_bucket(b, state, keep_observing),
+            Instruction::Assert(b) => self.visit_assert_bucket(b, state, keep_observing),
+            Instruction::Log(b) => self.visit_log_bucket(b, state, keep_observing),
+            Instruction::Loop(b) => self.visit_loop_bucket(b, state, keep_observing),
+            Instruction::CreateCmp(b) => self.visit_create_cmp_bucket(b, state, keep_observing),
+            Instruction::Constraint(b) => self.visit_constraint_bucket(b, state, keep_observing),
+            Instruction::Block(b) => self.visit_block_bucket(b, state, keep_observing),
+            Instruction::Nop(b) => self.visit_nop_bucket(b, state, keep_observing),
+        }
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/observer.rs b/circuit_passes/src/bucket_interpreter/observer.rs
index f21186e52..cd8b41f38 100644
--- a/circuit_passes/src/bucket_interpreter/observer.rs
+++ b/circuit_passes/src/bucket_interpreter/observer.rs
@@ -1,50 +1,58 @@
+use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
 use compiler::intermediate_representation::{Instruction, InstructionPointer};
 use compiler::intermediate_representation::ir_interface::{
     AssertBucket, BranchBucket, CallBucket, ComputeBucket, ConstraintBucket, CreateCmpBucket,
     LoadBucket, LocationRule, LogBucket, LoopBucket, NopBucket, ReturnBucket, StoreBucket,
     BlockBucket, ValueBucket,
 };
-use crate::bucket_interpreter::env::Env;
 
-/// Will get called everytime we are about to execute a bucket, with access to the environment
-/// prior to the execution of the bucket
-pub trait InterpreterObserver {
-    fn on_value_bucket(&self, bucket: &ValueBucket, env: &Env) -> bool;
-    fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool;
-    fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool;
-    fn on_compute_bucket(&self, bucket: &ComputeBucket, env: &Env) -> bool;
-    fn on_assert_bucket(&self, bucket: &AssertBucket, env: &Env) -> bool;
-    fn on_loop_bucket(&self, bucket: &LoopBucket, env: &Env) -> bool;
-    fn on_create_cmp_bucket(&self, bucket: &CreateCmpBucket, env: &Env) -> bool;
-    fn on_constraint_bucket(&self, bucket: &ConstraintBucket, env: &Env) -> bool;
-    fn on_block_bucket(&self, bucket: &BlockBucket, env: &Env) -> bool;
-    fn on_nop_bucket(&self, bucket: &NopBucket, env: &Env) -> bool;
-    fn on_location_rule(&self, location_rule: &LocationRule, env: &Env) -> bool;
-    fn on_call_bucket(&self, bucket: &CallBucket, env: &Env) -> bool;
-    fn on_branch_bucket(&self, bucket: &BranchBucket, env: &Env) -> bool;
-    fn on_return_bucket(&self, bucket: &ReturnBucket, env: &Env) -> bool;
-    fn on_log_bucket(&self, bucket: &LogBucket, env: &Env) -> bool;
+/// Will get called everytime some visitor is about to visit a bucket,
+/// with access to the state data prior to the execution of the bucket.
+pub trait Observer<S> {
+    fn on_value_bucket(&self, bucket: &ValueBucket, state: &S) -> bool;
+    fn on_load_bucket(&self, bucket: &LoadBucket, state: &S) -> bool;
+    fn on_store_bucket(&self, bucket: &StoreBucket, state: &S) -> bool;
+    fn on_compute_bucket(&self, bucket: &ComputeBucket, state: &S) -> bool;
+    fn on_assert_bucket(&self, bucket: &AssertBucket, state: &S) -> bool;
+    fn on_loop_bucket(&self, bucket: &LoopBucket, state: &S) -> bool;
+    fn on_create_cmp_bucket(&self, bucket: &CreateCmpBucket, state: &S) -> bool;
+    fn on_constraint_bucket(&self, bucket: &ConstraintBucket, state: &S) -> bool;
+    fn on_block_bucket(&self, bucket: &BlockBucket, state: &S) -> bool;
+    fn on_nop_bucket(&self, bucket: &NopBucket, state: &S) -> bool;
+    fn on_location_rule(&self, location_rule: &LocationRule, state: &S) -> bool;
+    fn on_call_bucket(&self, bucket: &CallBucket, state: &S) -> bool;
+    fn on_branch_bucket(&self, bucket: &BranchBucket, state: &S) -> bool;
+    fn on_return_bucket(&self, bucket: &ReturnBucket, state: &S) -> bool;
+    fn on_log_bucket(&self, bucket: &LogBucket, state: &S) -> bool;
 
-    fn on_instruction(&self, inst: &InstructionPointer, env: &Env) -> bool {
+    fn on_instruction(&self, inst: &InstructionPointer, state: &S) -> bool {
         match inst.as_ref() {
-            Instruction::Value(bucket) => self.on_value_bucket(bucket, env),
-            Instruction::Load(bucket) => self.on_load_bucket(bucket, env),
-            Instruction::Store(bucket) => self.on_store_bucket(bucket, env),
-            Instruction::Compute(bucket) => self.on_compute_bucket(bucket, env),
-            Instruction::Call(bucket) => self.on_call_bucket(bucket, env),
-            Instruction::Branch(bucket) => self.on_branch_bucket(bucket, env),
-            Instruction::Return(bucket) => self.on_return_bucket(bucket, env),
-            Instruction::Assert(bucket) => self.on_assert_bucket(bucket, env),
-            Instruction::Log(bucket) => self.on_log_bucket(bucket, env),
-            Instruction::Loop(bucket) => self.on_loop_bucket(bucket, env),
-            Instruction::CreateCmp(bucket) => self.on_create_cmp_bucket(bucket, env),
-            Instruction::Constraint(bucket) => self.on_constraint_bucket(bucket, env),
-            Instruction::Block(bucket) => self.on_block_bucket(bucket, env),
-            Instruction::Nop(bucket) => self.on_nop_bucket(bucket, env),
+            Instruction::Value(bucket) => self.on_value_bucket(bucket, state),
+            Instruction::Load(bucket) => self.on_load_bucket(bucket, state),
+            Instruction::Store(bucket) => self.on_store_bucket(bucket, state),
+            Instruction::Compute(bucket) => self.on_compute_bucket(bucket, state),
+            Instruction::Call(bucket) => self.on_call_bucket(bucket, state),
+            Instruction::Branch(bucket) => self.on_branch_bucket(bucket, state),
+            Instruction::Return(bucket) => self.on_return_bucket(bucket, state),
+            Instruction::Assert(bucket) => self.on_assert_bucket(bucket, state),
+            Instruction::Log(bucket) => self.on_log_bucket(bucket, state),
+            Instruction::Loop(bucket) => self.on_loop_bucket(bucket, state),
+            Instruction::CreateCmp(bucket) => self.on_create_cmp_bucket(bucket, state),
+            Instruction::Constraint(bucket) => self.on_constraint_bucket(bucket, state),
+            Instruction::Block(bucket) => self.on_block_bucket(bucket, state),
+            Instruction::Nop(bucket) => self.on_nop_bucket(bucket, state),
         }
     }
 
-    fn ignore_function_calls(&self) -> bool;
     fn ignore_subcmp_calls(&self) -> bool;
+    fn ignore_function_calls(&self) -> bool;
     fn ignore_extracted_function_calls(&self) -> bool;
+
+    fn ignore_call(&self, callee: &String) -> bool {
+        if callee.starts_with(GENERATED_FN_PREFIX) {
+            self.ignore_extracted_function_calls()
+        } else {
+            self.ignore_function_calls()
+        }
+    }
 }
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 15c537e75..80b2555a5 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -8,7 +8,7 @@ use compiler::intermediate_representation::ir_interface::*;
 use indexmap::{IndexMap, IndexSet};
 use crate::bucket_interpreter::env::{Env, LibraryAccess};
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use super::{CircuitTransformationPass, GlobalPassData};
 
 type BranchValues = BTreeMap<BucketId, Option<bool>>;
@@ -59,7 +59,7 @@ impl<'d> ConditionalFlatteningPass<'d> {
     }
 }
 
-impl InterpreterObserver for ConditionalFlatteningPass<'_> {
+impl Observer<Env<'_>> for ConditionalFlatteningPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 7a9f5e985..ecf09d591 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -6,7 +6,7 @@ use compiler::intermediate_representation::ir_interface::*;
 use compiler::intermediate_representation::ir_interface::StatusInput::{Last, NoLast};
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use super::{CircuitTransformationPass, GlobalPassData};
 
 pub struct DeterministicSubCmpInvokePass<'d> {
@@ -48,7 +48,7 @@ impl<'d> DeterministicSubCmpInvokePass<'d> {
     }
 }
 
-impl InterpreterObserver for DeterministicSubCmpInvokePass<'_> {
+impl Observer<Env<'_>> for DeterministicSubCmpInvokePass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 9498db412..cafb9bf1a 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -6,7 +6,7 @@ use compiler::intermediate_representation::BucketId;
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::GlobalPassData;
 use super::DEBUG_LOOP_UNROLL;
@@ -218,7 +218,7 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
     }
 }
 
-impl InterpreterObserver for EnvRecorder<'_, '_> {
+impl Observer<Env<'_>> for EnvRecorder<'_, '_> {
     fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
         if let Some(_) = bucket.bounded_fn {
             todo!(); //not sure if/how to handle that
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index 48c261413..6121b6ba5 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -14,7 +14,7 @@ use compiler::intermediate_representation::{
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
 use super::{CircuitTransformationPass, GlobalPassData};
 use self::body_extractor::LoopBodyExtractor;
@@ -122,7 +122,7 @@ impl<'d> LoopUnrollPass<'d> {
     }
 }
 
-impl InterpreterObserver for LoopUnrollPass<'_> {
+impl Observer<Env<'_>> for LoopUnrollPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 8caa709f1..04739ad07 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -6,7 +6,7 @@ use compiler::intermediate_representation::{ir_interface::*, BucketId};
 use compiler::intermediate_representation::{InstructionPointer, UpdateId};
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::Value::KnownU32;
 use super::{CircuitTransformationPass, GlobalPassData};
@@ -94,7 +94,7 @@ impl<'d> MappedToIndexedPass<'d> {
     }
 }
 
-impl InterpreterObserver for MappedToIndexedPass<'_> {
+impl Observer<Env<'_>> for MappedToIndexedPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index f4440e29f..5d9adc1bf 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -14,6 +14,7 @@ use crate::passes::{
     deterministic_subcomponent_invocation::DeterministicSubCmpInvokePass,
     loop_unroll::LoopUnrollPass, mapped_to_indexed::MappedToIndexedPass,
     simplification::SimplificationPass, unknown_index_sanitization::UnknownIndexSanitizationPass,
+    unused_func_removal::UnusedFuncRemovalPass,
 };
 
 use self::loop_unroll::body_extractor::{UnrolledIterLvars, ToOriginalLocation, FuncArgIdx};
@@ -22,6 +23,7 @@ mod const_arg_deduplication;
 mod conditional_flattening;
 mod simplification;
 mod deterministic_subcomponent_invocation;
+mod unused_func_removal;
 mod mapped_to_indexed;
 mod unknown_index_sanitization;
 mod checks;
@@ -425,6 +427,7 @@ pub enum PassKind {
     LoopUnroll,
     Simplification,
     ConditionalFlattening,
+    UnusedFunctionRemoval,
     DeterministicSubCmpInvoke,
     MappedToIndexed,
     UnknownIndexSanitization,
@@ -496,6 +499,11 @@ impl PassManager {
         self
     }
 
+    pub fn schedule_unused_function_removal_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::UnusedFunctionRemoval);
+        self
+    }
+
     pub fn schedule_mapped_to_indexed_pass(&self) -> &Self {
         self.passes.borrow_mut().push(PassKind::MappedToIndexed);
         self
@@ -525,6 +533,9 @@ impl PassManager {
             PassKind::DeterministicSubCmpInvoke => {
                 Box::new(DeterministicSubCmpInvokePass::new(prime.clone(), global_data))
             }
+            PassKind::UnusedFunctionRemoval => {
+                Box::new(UnusedFuncRemovalPass::new(prime.clone(), global_data))
+            }
             PassKind::MappedToIndexed => {
                 Box::new(MappedToIndexedPass::new(prime.clone(), global_data))
             }
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 985342844..b866304ac 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -6,7 +6,7 @@ use compiler::intermediate_representation::{InstructionPointer, new_id, BucketId
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use crate::bucket_interpreter::value::Value;
 use super::{CircuitTransformationPass, GlobalPassData};
 
@@ -29,7 +29,7 @@ impl<'d> SimplificationPass<'d> {
     }
 }
 
-impl InterpreterObserver for SimplificationPass<'_> {
+impl Observer<Env<'_>> for SimplificationPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index a8b419777..d88e5d18e 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -10,7 +10,7 @@ use code_producers::llvm_elements::array_switch::{get_array_load_name, get_array
 use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
-use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::observer::Observer;
 use crate::bucket_interpreter::operations::compute_operation;
 use crate::bucket_interpreter::R;
 use crate::bucket_interpreter::value::Value::{KnownU32, KnownBigInt};
@@ -170,7 +170,7 @@ impl<'d> UnknownIndexSanitizationPass<'d> {
  * - loads with a function call that returns the loaded value
  * - stores with a function call that performs the store
  */
-impl InterpreterObserver for UnknownIndexSanitizationPass<'_> {
+impl Observer<Env<'_>> for UnknownIndexSanitizationPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
diff --git a/circuit_passes/src/passes/unused_func_removal.rs b/circuit_passes/src/passes/unused_func_removal.rs
new file mode 100644
index 000000000..54821052b
--- /dev/null
+++ b/circuit_passes/src/passes/unused_func_removal.rs
@@ -0,0 +1,167 @@
+use std::cell::{RefCell, Ref};
+use std::collections::{HashSet, HashMap};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use compiler::compiler_interface::Circuit;
+use compiler::intermediate_representation::ir_interface::*;
+use crate::bucket_interpreter::{observer::Observer, env::LibraryAccess};
+use crate::bucket_interpreter::observed_visitor::ObservedVisitor;
+use super::{CircuitTransformationPass, GlobalPassData};
+
+/// The goal of this pass is to remove unreachable functions from the Circuit
+pub struct UnusedFuncRemovalPass<'d> {
+    _global_data: &'d RefCell<GlobalPassData>,
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
+    used_functions: RefCell<HashSet<String>>,
+}
+
+impl<'d> UnusedFuncRemovalPass<'d> {
+    pub fn new(_prime: String, _global_data: &'d RefCell<GlobalPassData>) -> Self {
+        UnusedFuncRemovalPass { _global_data, used_functions: Default::default() }
+    }
+}
+
+impl Observer<()> for UnusedFuncRemovalPass<'_> {
+    fn on_value_bucket(&self, _bucket: &ValueBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_load_bucket(&self, _bucket: &LoadBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_store_bucket(&self, _bucket: &StoreBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_assert_bucket(&self, _bucket: &AssertBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_loop_bucket(&self, _bucket: &LoopBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_block_bucket(&self, _bucket: &BlockBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_nop_bucket(&self, _bucket: &NopBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_location_rule(&self, _location_rule: &LocationRule, _: &()) -> bool {
+        true
+    }
+
+    fn on_call_bucket(&self, bucket: &CallBucket, _: &()) -> bool {
+        self.used_functions.borrow_mut().insert(bucket.symbol.clone());
+        true
+    }
+
+    fn on_branch_bucket(&self, _bucket: &BranchBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_return_bucket(&self, _bucket: &ReturnBucket, _: &()) -> bool {
+        true
+    }
+
+    fn on_log_bucket(&self, _bucket: &LogBucket, _: &()) -> bool {
+        true
+    }
+
+    fn ignore_function_calls(&self) -> bool {
+        false
+    }
+
+    fn ignore_subcmp_calls(&self) -> bool {
+        false
+    }
+
+    fn ignore_extracted_function_calls(&self) -> bool {
+        false
+    }
+}
+
+impl CircuitTransformationPass for UnusedFuncRemovalPass<'_> {
+    fn name(&self) -> &str {
+        "UnusedFuncRemovalPass"
+    }
+
+    fn get_updated_field_constants(&self) -> Vec<String> {
+        unreachable!()
+    }
+
+    fn transform_circuit(&self, circuit: &Circuit) -> Circuit {
+        //Build a structure to implement LibraryAccess
+        struct LibsImpl {
+            functions: HashMap<String, RefCell<FunctionCode>>,
+        }
+        impl LibraryAccess for LibsImpl {
+            fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+                self.functions[name].borrow()
+            }
+
+            fn get_template(&self, _name: &String) -> Ref<TemplateCode> {
+                unreachable!()
+            }
+        }
+        let libs = LibsImpl {
+            functions: {
+                let mut functions = HashMap::new();
+                for f in &circuit.functions {
+                    functions.insert(f.header.clone(), RefCell::new((*f).clone()));
+                }
+                functions
+            },
+        };
+
+        // Search each template for CallBucket and cache the names
+        let visitor = ObservedVisitor::new(self, Some(&libs));
+        let templates = circuit
+            .templates
+            .iter()
+            .map(|t| {
+                visitor.visit_instructions(&t.body, &(), true);
+                t.clone()
+            })
+            .collect();
+
+        // Filter out functions that are never used
+        let functions = circuit
+            .functions
+            .iter()
+            .filter_map(|f| {
+                if self.used_functions.borrow().contains(&f.header) {
+                    Some(f.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        // Return new circuit with reduced function list (and cloned templates)
+        Circuit {
+            wasm_producer: circuit.wasm_producer.clone(),
+            c_producer: circuit.c_producer.clone(),
+            llvm_data: circuit
+                .llvm_data
+                .clone_with_new_field_tracking(circuit.llvm_data.field_tracking.clone()),
+            templates,
+            functions,
+        }
+    }
+}
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index 581ea1180..847adfd64 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -47,6 +47,21 @@ pub const FR_PTR_CAST_I256_I32: &str = "cast_ptr_i256_i32";
 pub const FR_NULL_I256_ARR_PTR: &str = "null_i256_arr_ptr";
 pub const FR_NULL_I256_PTR: &str = "null_i256_ptr";
 
+thread_local!(
+    /// Contains all "built-in" functions that can be generated in
+    /// the Circom IR prior to the stage where LLVM IR is generated.
+    pub static BUILT_IN_NAMES: std::collections::HashSet<&'static str> = {
+      let mut result =  std::collections::HashSet::default();
+      result.insert(FR_INDEX_ARR_PTR);
+      result.insert(FR_IDENTITY_ARR_PTR);
+      result.insert(FR_PTR_CAST_I32_I256);
+      result.insert(FR_PTR_CAST_I256_I32);
+      result.insert(FR_NULL_I256_ARR_PTR);
+      result.insert(FR_NULL_I256_PTR);
+      result
+    }
+);
+
 macro_rules! fr_nullary_op {
     ($name: expr, $producer: expr, $retTy: expr) => {{
         let func = create_function($producer, &None, 0, "", $name, $retTy.fn_type(&[], false));
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 7289f9826..5ab27bd50 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -167,7 +167,7 @@ impl<'a> ExtractedFunctionCtx<'a> {
             .into_int_value()
             .get_zero_extended_constant()
             .expect("must reference a constant argument index");
-        *self.args.get(num as usize).expect("must reference a known argument index")
+        *self.args.get(num as usize).expect("must reference a valid argument index")
     }
 }
 

From db5ec42c31e45294c7cc54d13356879a5c8d8ea7 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 23 Oct 2023 10:38:02 -0500
Subject: [PATCH 20/22] xfail the new test temporarily ~eyeroll~

---
 circom/tests/loops/fixed_idx_in_fixed_idx.circom | 1 +
 1 file changed, 1 insertion(+)

diff --git a/circom/tests/loops/fixed_idx_in_fixed_idx.circom b/circom/tests/loops/fixed_idx_in_fixed_idx.circom
index 6f34c84b3..3a9f6d140 100644
--- a/circom/tests/loops/fixed_idx_in_fixed_idx.circom
+++ b/circom/tests/loops/fixed_idx_in_fixed_idx.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 // This case initially triggered the "assert!(bucket_to_args.is_empty());" line in body_extractor.rs
 //  because the entire expression 'in[byte_order[i]]'' is replaced but the 'byte_order[i]' expression

From c1ee0becb47b9d5a88f90ac954d9558168403309 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 24 Oct 2023 15:03:27 -0500
Subject: [PATCH 21/22] Fix merge conflict and tests

---
 circom/tests/loops/assign_in_loop_1.circom       | 1 -
 circom/tests/loops/fixed_idx_in_fixed_idx.circom | 1 -
 circom/tests/loops/inner_conditional_11.circom   | 1 -
 circom/tests/subcmps/mapped.circom               | 1 -
 circom/tests/subcmps/mapped2.circom              | 1 -
 circom/tests/subcmps/mapped3.circom              | 1 -
 circom/tests/subcmps/mapped4.circom              | 1 -
 circom/tests/subcmps/subcmps3.circom             | 1 -
 circuit_passes/src/passes/unused_func_removal.rs | 8 +++++---
 9 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/circom/tests/loops/assign_in_loop_1.circom b/circom/tests/loops/assign_in_loop_1.circom
index 54fba3e15..acac850d7 100644
--- a/circom/tests/loops/assign_in_loop_1.circom
+++ b/circom/tests/loops/assign_in_loop_1.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template Inner() {
     signal input in;
diff --git a/circom/tests/loops/fixed_idx_in_fixed_idx.circom b/circom/tests/loops/fixed_idx_in_fixed_idx.circom
index 3a9f6d140..6f34c84b3 100644
--- a/circom/tests/loops/fixed_idx_in_fixed_idx.circom
+++ b/circom/tests/loops/fixed_idx_in_fixed_idx.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 // This case initially triggered the "assert!(bucket_to_args.is_empty());" line in body_extractor.rs
 //  because the entire expression 'in[byte_order[i]]'' is replaced but the 'byte_order[i]' expression
diff --git a/circom/tests/loops/inner_conditional_11.circom b/circom/tests/loops/inner_conditional_11.circom
index 12b303ad2..a18a080ee 100644
--- a/circom/tests/loops/inner_conditional_11.circom
+++ b/circom/tests/loops/inner_conditional_11.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template Sigma() {
     signal input inp;
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index ed3eab483..8e16e7307 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -2,7 +2,6 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index cfb12afd3..0e1670a01 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -2,7 +2,6 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index db960d89a..e2957eaff 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template ArrayOp(q) {
     signal input inp[15];
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index 9e9b00c16..a0a771958 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template MatrixOp(q) {
     signal input inp[5][3];
diff --git a/circom/tests/subcmps/subcmps3.circom b/circom/tests/subcmps/subcmps3.circom
index ec49bb91a..63eac590b 100644
--- a/circom/tests/subcmps/subcmps3.circom
+++ b/circom/tests/subcmps/subcmps3.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*     // TEMPORARY: because EXTRACT_LOOP_BODY_TO_NEW_FUNC == false
 
 template Sum(n) {
     signal input inp[n];
diff --git a/circuit_passes/src/passes/unused_func_removal.rs b/circuit_passes/src/passes/unused_func_removal.rs
index 54821052b..856072c83 100644
--- a/circuit_passes/src/passes/unused_func_removal.rs
+++ b/circuit_passes/src/passes/unused_func_removal.rs
@@ -157,9 +157,11 @@ impl CircuitTransformationPass for UnusedFuncRemovalPass<'_> {
         Circuit {
             wasm_producer: circuit.wasm_producer.clone(),
             c_producer: circuit.c_producer.clone(),
-            llvm_data: circuit
-                .llvm_data
-                .clone_with_new_field_tracking(circuit.llvm_data.field_tracking.clone()),
+            llvm_data: circuit.llvm_data.clone_with_updates(
+                circuit.llvm_data.field_tracking.clone(),
+                self.get_updated_bounded_array_loads(&circuit.llvm_data.bounded_array_loads),
+                self.get_updated_bounded_array_stores(&circuit.llvm_data.bounded_array_stores),
+            ),
             templates,
             functions,
         }

From 08d46e8ab6035bced199914049c33a368423c220 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 30 Oct 2023 17:03:25 -0500
Subject: [PATCH 22/22] address PR feedback

---
 .../tests/zzz/unreachable_code_crash.circom   | 32 ++++++++++---------
 .../env/extracted_func_env.rs                 |  4 +--
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/circom/tests/zzz/unreachable_code_crash.circom b/circom/tests/zzz/unreachable_code_crash.circom
index e63e9810e..2bcc5f528 100644
--- a/circom/tests/zzz/unreachable_code_crash.circom
+++ b/circom/tests/zzz/unreachable_code_crash.circom
@@ -1,24 +1,26 @@
 pragma circom 2.0.2;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
 
 template OR() {
     signal input a;
 }
 
+// This test demonstrates the need for the UnusedFuncRemovalPass.
+// Here's what happens without that pass. The outer loop unrolls first, with 2 copies of its body
+//  because it has 2 iterations. In the second iteration (i.e. the second copy of the inner loop),
+//  the 'true' branch of the if-else will never execute so in the "loop.body" function generated
+//  for that second copy of the inner loop, this branch is dead code. Because it is dead code, no
+//  parameter was added to the generated function to reference the destination of the StoreBucket
+//  in that branch and therefore the location information in that StoreBucket was not updated thus
+//  leaving an invalid parameter reference that causes 'functions.rs::get_arg_ptr' to crash.
+//
 template InvalidArgIndex(n, k) {
     component has_prev_non_zero[k * n];
     for (var i = k - 1; i >= 0; i--) {
         for (var j = n - 1; j >= 0; j--) {
             has_prev_non_zero[n * i + j] = OR();
             if (i == k - 1 && j == n - 1) {
-                // StoreBucket here causes a crash in `get_arg_ptr` 
-                // Here's what happens. The outer loop unrolls first, 2 iterations. In the second
-                //  iteration, this branch of the if-else will never execute so in the generated
-                //  "loop.body" function, this branch is dead code, thus no parameter was added
-                //  to the function to reference the destination of this StoreBucket and the 
-                //  location information was not updated so there is an invalid parameter reference
-                //  that causes 'functions.rs::get_arg_ptr' to crash, but it's in dead code.
                 has_prev_non_zero[n * i + j].a <-- 99;
             } else {
                 has_prev_non_zero[n * i + j].a <-- 33;
@@ -32,10 +34,10 @@ component main = InvalidArgIndex(3, 2);
 //// Check that only the proper versions of the generated functions remain
 //// (i.e. the initial one was removed after conditional flattening).
 //
-//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
-//CHECK:     define void @..generated..loop.body.[[[0-9]+]].F(
-//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
-//CHECK:     define void @..generated..loop.body.[[[0-9]+]].T(
-//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
-//CHECK:     define void @..generated..loop.body.[[[0-9]+]].F.T(
-//CHECK-NOT: define void @..generated..loop.body.[[[0-9]+]](
+//CHECK-NOT: define void @..generated..loop.body.{{[0-9]+}}(
+//CHECK:     define void @..generated..loop.body.[[NAME_1:[0-9]+]].F(
+//CHECK-NOT: define void @..generated..loop.body.{{[0-9]+}}(
+//CHECK:     define void @..generated..loop.body.[[NAME_1]].T(
+//CHECK-NOT: define void @..generated..loop.body.{{[0-9]+}}(
+//CHECK:     define void @..generated..loop.body.[[NAME_2:[0-9]+]].F.T(
+//CHECK-NOT: define void @..generated..loop.body.{{[0-9]+}}(
diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 24251ed66..7e5413be1 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -207,7 +207,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
                         if counter_override {
-                            todo!()
+                            unreachable!() // there is no counter for a counter reference
                         } else {
                             self.base.subcmp_counter_is_zero(subcmp)
                         }
@@ -238,7 +238,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                             _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
                         };
                         if counter_override {
-                            todo!()
+                            unreachable!() // there is no counter for a counter reference
                         } else {
                             self.base.subcmp_counter_equal_to(subcmp, value)
                         }