diff --git a/src/arithmetic.rs b/src/arithmetic.rs
index e3dc6c4489..4b9dfde397 100644
--- a/src/arithmetic.rs
+++ b/src/arithmetic.rs
@@ -17,11 +17,9 @@ mod constant;
 #[cfg(feature = "alloc")]
 pub mod bigint;
 
+mod error;
 pub mod montgomery;
 
 mod n0;
-
-#[allow(dead_code)]
-const BIGINT_MODULUS_MAX_LIMBS: usize = 8192 / crate::limb::LIMB_BITS;
-
 pub use constant::limbs_from_hex;
+pub(crate) use error::ImpossibleLengthError;
diff --git a/src/arithmetic/bigint.rs b/src/arithmetic/bigint.rs
index b326c35e74..a5d79c1f3e 100644
--- a/src/arithmetic/bigint.rs
+++ b/src/arithmetic/bigint.rs
@@ -41,6 +41,7 @@ pub(crate) use self::{
     modulus::{Modulus, OwnedModulus, MODULUS_MAX_LIMBS},
     private_exponent::PrivateExponent,
 };
+use super::ImpossibleLengthError;
 use crate::{
     arithmetic::montgomery::*,
     bits::BitLength,
@@ -94,10 +95,10 @@ fn from_montgomery_amm<M>(limbs: BoxedLimbs<M>, m: &Modulus<M>) -> Elem<M, Unenc
     debug_assert_eq!(limbs.len(), m.limbs().len());
 
     let mut limbs = limbs;
-    let mut one = [0; MODULUS_MAX_LIMBS];
+    let mut one = [0; MAX_LIMBS];
     one[0] = 1;
     let one = &one[..m.limbs().len()];
-    limbs_mont_mul(&mut limbs, one, m.limbs(), m.n0(), m.cpu_features());
+    limbs_mont_mul(&mut limbs, one, m.limbs(), m.n0(), m.cpu_features()).unwrap();
     Elem {
         limbs,
         encoding: PhantomData,
@@ -142,7 +143,7 @@ pub fn elem_mul<M, AF, BF>(
 where
     (AF, BF): ProductEncoding,
 {
-    limbs_mont_mul(&mut b.limbs, &a.limbs, m.limbs(), m.n0(), m.cpu_features());
+    limbs_mont_mul(&mut b.limbs, &a.limbs, m.limbs(), m.n0(), m.cpu_features()).unwrap();
     Elem {
         limbs: b.limbs,
         encoding: PhantomData,
@@ -187,7 +188,7 @@ pub fn elem_reduced<Larger, Smaller>(
     // `limbs_from_mont_in_place` requires this.
     assert_eq!(a.limbs.len(), m.limbs().len() * 2);
 
-    let mut tmp = [0; MODULUS_MAX_LIMBS];
+    let mut tmp = [0; MAX_LIMBS];
     let tmp = &mut tmp[..a.limbs.len()];
     tmp.copy_from_slice(&a.limbs);
 
@@ -203,7 +204,7 @@ fn elem_squared<M, E>(
 where
     (E, E): ProductEncoding,
 {
-    limbs_mont_square(&mut a.limbs, m.limbs(), m.n0(), m.cpu_features());
+    limbs_mont_square(&mut a.limbs, m.limbs(), m.n0(), m.cpu_features()).unwrap();
     Elem {
         limbs: a.limbs,
         encoding: PhantomData,
@@ -404,7 +405,7 @@ pub fn elem_exp_consttime<M>(
     base: Elem<M, R>,
     exponent: &PrivateExponent,
     m: &Modulus<M>,
-) -> Result<Elem<M, Unencoded>, error::Unspecified> {
+) -> Result<Elem<M, Unencoded>, ImpossibleLengthError> {
     use crate::{bssl, limb::Window};
 
     const WINDOW_BITS: usize = 5;
@@ -465,7 +466,7 @@ pub fn elem_exp_consttime<M>(
         let src1 = entry(previous, src1, num_limbs);
         let src2 = entry(previous, src2, num_limbs);
         let dst = entry_mut(rest, 0, num_limbs);
-        limbs_mont_product(dst, src1, src2, m.limbs(), m.n0(), m.cpu_features());
+        limbs_mont_product(dst, src1, src2, m.limbs(), m.n0(), m.cpu_features())?;
     }
 
     let tmp = m.zero();
@@ -490,7 +491,7 @@ pub fn elem_exp_consttime<M>(
     base: Elem<M, R>,
     exponent: &PrivateExponent,
     m: &Modulus<M>,
-) -> Result<Elem<M, Unencoded>, error::Unspecified> {
+) -> Result<Elem<M, Unencoded>, ImpossibleLengthError> {
     use crate::{cpu, limb::LIMB_BYTES};
 
     // Pretty much all the math here requires CPU feature detection to have
@@ -629,15 +630,16 @@ pub fn elem_exp_consttime<M>(
         mut i: Window,
         num_limbs: usize,
         cpu_features: cpu::Features,
-    ) {
+    ) -> Result<(), ImpossibleLengthError> {
         loop {
             scatter(table, acc, i, num_limbs);
             i *= 2;
             if i >= (TABLE_ENTRIES as Window) {
                 break;
             }
-            limbs_mont_square(acc, m_cached, n0, cpu_features);
+            limbs_mont_square(acc, m_cached, n0, cpu_features)?;
         }
+        Ok(())
     }
 
     // All entries in `table` will be Montgomery encoded.
@@ -650,12 +652,12 @@ pub fn elem_exp_consttime<M>(
     acc.copy_from_slice(base_cached);
 
     // Fill in entries 1, 2, 4, 8, 16.
-    scatter_powers_of_2(table, acc, m_cached, n0, 1, num_limbs, cpu_features);
+    scatter_powers_of_2(table, acc, m_cached, n0, 1, num_limbs, cpu_features)?;
     // Fill in entries 3, 6, 12, 24; 5, 10, 20, 30; 7, 14, 28; 9, 18; 11, 22; 13, 26; 15, 30;
     // 17; 19; 21; 23; 25; 27; 29; 31.
     for i in (3..(TABLE_ENTRIES as Window)).step_by(2) {
         limbs_mul_mont_gather5_amm(table, acc, base_cached, m_cached, n0, i - 1, num_limbs);
-        scatter_powers_of_2(table, acc, m_cached, n0, i, num_limbs, cpu_features);
+        scatter_powers_of_2(table, acc, m_cached, n0, i, num_limbs, cpu_features)?;
     }
 
     let acc = limb::fold_5_bit_windows(
diff --git a/src/arithmetic/bigint/modulus.rs b/src/arithmetic/bigint/modulus.rs
index 3f87053c01..74cffb656b 100644
--- a/src/arithmetic/bigint/modulus.rs
+++ b/src/arithmetic/bigint/modulus.rs
@@ -12,7 +12,7 @@
 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
-use super::{BoxedLimbs, Elem, PublicModulus, Unencoded, N0};
+use super::{super::montgomery, BoxedLimbs, Elem, PublicModulus, Unencoded, N0};
 use crate::{
     bits::BitLength,
     cpu, error,
@@ -21,14 +21,7 @@ use crate::{
 };
 use core::marker::PhantomData;
 
-/// The x86 implementation of `bn_mul_mont`, at least, requires at least 4
-/// limbs. For a long time we have required 4 limbs for all targets, though
-/// this may be unnecessary. TODO: Replace this with
-/// `n.len() < 256 / LIMB_BITS` so that 32-bit and 64-bit platforms behave the
-/// same.
-pub const MODULUS_MIN_LIMBS: usize = 4;
-
-pub const MODULUS_MAX_LIMBS: usize = super::super::BIGINT_MODULUS_MAX_LIMBS;
+pub const MODULUS_MAX_LIMBS: usize = montgomery::MAX_LIMBS;
 
 /// The modulus *m* for a ring ℤ/mℤ, along with the precomputed values needed
 /// for efficient Montgomery multiplication modulo *m*. The value must be odd
@@ -93,7 +86,7 @@ impl<M> OwnedModulus<M> {
         if n.len() > MODULUS_MAX_LIMBS {
             return Err(error::KeyRejected::too_large());
         }
-        if n.len() < MODULUS_MIN_LIMBS {
+        if n.len() < montgomery::MIN_LIMBS {
             return Err(error::KeyRejected::unexpected_error());
         }
         if limb::limbs_are_even_constant_time(&n) != LimbMask::False {
diff --git a/src/arithmetic/error.rs b/src/arithmetic/error.rs
new file mode 100644
index 0000000000..656ba4aa4d
--- /dev/null
+++ b/src/arithmetic/error.rs
@@ -0,0 +1,32 @@
+// Copyright 2023 Brian Smith.
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+use crate::error;
+
+/// `ImpossibleLengthError` should never occur.
+#[derive(Debug)]
+pub struct ImpossibleLengthError(());
+
+impl ImpossibleLengthError {
+    pub(super) fn new() -> Self {
+        // unreachable!();
+        Self(())
+    }
+}
+
+impl From<ImpossibleLengthError> for error::Unspecified {
+    fn from(_: ImpossibleLengthError) -> Self {
+        Self
+    }
+}
diff --git a/src/arithmetic/montgomery.rs b/src/arithmetic/montgomery.rs
index b3bed1b14c..85de1027fd 100644
--- a/src/arithmetic/montgomery.rs
+++ b/src/arithmetic/montgomery.rs
@@ -13,6 +13,7 @@
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 pub use super::n0::N0;
+use super::ImpossibleLengthError;
 use crate::cpu;
 
 // Indicates that the element is not encoded; there is no *R* factor
@@ -112,17 +113,33 @@ impl ProductEncoding for (RRR, RInverse) {
 #[allow(unused_imports)]
 use crate::{bssl, c, limb::Limb};
 
+/// The x86 implementation of `bn_mul_mont`, at least, requires at least 4
+/// limbs. For a long time we have required 4 limbs for all targets, though
+/// this may be unnecessary. TODO: Replace this with
+/// `n.len() < 256 / LIMB_BITS` so that 32-bit and 64-bit platforms behave the
+/// same.
+pub const MIN_LIMBS: usize = 4;
+
+/// Many functions, including assembly functions, will stack allocate
+/// `n * MAX_LIMBS` (for some `n`) limbs to store temporary values. Reduce the
+/// chance of stack overflows by limiting these functions according to the
+/// maximum size of modulus we wish to support.
+pub const MAX_LIMBS: usize = 8192 / crate::limb::LIMB_BITS;
+
 #[inline(always)]
 unsafe fn mul_mont(
     r: *mut Limb,
     a: *const Limb,
     b: *const Limb,
-    n: *const Limb,
+    m: &[Limb],
     n0: &N0,
-    num_limbs: c::size_t,
     _: cpu::Features,
-) {
-    bn_mul_mont(r, a, b, n, n0, num_limbs)
+) -> Result<(), ImpossibleLengthError> {
+    if m.len() < MIN_LIMBS || m.len() > MAX_LIMBS {
+        return Err(ImpossibleLengthError::new());
+    }
+    bn_mul_mont(r, a, b, m.as_ptr(), n0, m.len());
+    Ok(())
 }
 
 #[cfg(not(any(
@@ -149,7 +166,7 @@ prefixed_export! {
         // Nothing aliases `n`
         let n = unsafe { core::slice::from_raw_parts(n, num_limbs) };
 
-        let mut tmp = [0; 2 * super::BIGINT_MODULUS_MAX_LIMBS];
+        let mut tmp = [0; 2 * MAX_LIMBS];
         let tmp = &mut tmp[..(2 * num_limbs)];
         {
             let a: &[Limb] = unsafe { core::slice::from_raw_parts(a, num_limbs) };
@@ -257,20 +274,11 @@ pub(super) fn limbs_mont_mul(
     m: &[Limb],
     n0: &N0,
     cpu_features: cpu::Features,
-) {
-    debug_assert_eq!(r.len(), m.len());
-    debug_assert_eq!(a.len(), m.len());
-    unsafe {
-        mul_mont(
-            r.as_mut_ptr(),
-            r.as_ptr(),
-            a.as_ptr(),
-            m.as_ptr(),
-            n0,
-            r.len(),
-            cpu_features,
-        )
+) -> Result<(), ImpossibleLengthError> {
+    if r.len() != m.len() || a.len() != m.len() {
+        return Err(ImpossibleLengthError::new());
     }
+    unsafe { mul_mont(r.as_mut_ptr(), r.as_ptr(), a.as_ptr(), m, n0, cpu_features) }
 }
 
 /// r = a * b
@@ -282,39 +290,26 @@ pub(super) fn limbs_mont_product(
     m: &[Limb],
     n0: &N0,
     cpu_features: cpu::Features,
-) {
-    debug_assert_eq!(r.len(), m.len());
-    debug_assert_eq!(a.len(), m.len());
-    debug_assert_eq!(b.len(), m.len());
-
-    unsafe {
-        mul_mont(
-            r.as_mut_ptr(),
-            a.as_ptr(),
-            b.as_ptr(),
-            m.as_ptr(),
-            n0,
-            r.len(),
-            cpu_features,
-        )
+) -> Result<(), ImpossibleLengthError> {
+    if r.len() != m.len() || a.len() != m.len() || b.len() != m.len() {
+        return Err(ImpossibleLengthError::new());
     }
+    unsafe { mul_mont(r.as_mut_ptr(), a.as_ptr(), b.as_ptr(), m, n0, cpu_features) }
 }
 
 /// r = r**2
-pub(super) fn limbs_mont_square(r: &mut [Limb], m: &[Limb], n0: &N0, cpu_features: cpu::Features) {
-    debug_assert_eq!(r.len(), m.len());
-    unsafe {
-        mul_mont(
-            r.as_mut_ptr(),
-            r.as_ptr(),
-            r.as_ptr(),
-            m.as_ptr(),
-            n0,
-            r.len(),
-            cpu_features,
-        )
+pub(super) fn limbs_mont_square(
+    r: &mut [Limb],
+    m: &[Limb],
+    n0: &N0,
+    cpu_features: cpu::Features,
+) -> Result<(), ImpossibleLengthError> {
+    if r.len() != m.len() {
+        return Err(ImpossibleLengthError::new());
     }
+    unsafe { mul_mont(r.as_mut_ptr(), r.as_ptr(), r.as_ptr(), m, n0, cpu_features) }
 }
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -338,7 +333,7 @@ mod tests {
         ];
 
         for (i, (r_input, a, w, expected_retval, expected_r)) in TEST_CASES.iter().enumerate() {
-            let mut r = [0; super::super::BIGINT_MODULUS_MAX_LIMBS];
+            let mut r = [0; MAX_LIMBS];
             let r = {
                 let r = &mut r[..r_input.len()];
                 r.copy_from_slice(r_input);
diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs
index 57e4c7aaa1..af4f25fd45 100644
--- a/src/ec/suite_b/ops.rs
+++ b/src/ec/suite_b/ops.rs
@@ -12,7 +12,12 @@
 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
-use crate::{arithmetic::limbs_from_hex, arithmetic::montgomery::*, error, limb::*};
+use crate::{
+    arithmetic::limbs_from_hex,
+    arithmetic::montgomery::{Encoding, ProductEncoding, Unencoded, R, RR},
+    error,
+    limb::*,
+};
 use core::marker::PhantomData;
 
 pub use self::elem::*;
diff --git a/src/ec/suite_b/ops/elem.rs b/src/ec/suite_b/ops/elem.rs
index e8479f2af6..7b408e09c9 100644
--- a/src/ec/suite_b/ops/elem.rs
+++ b/src/ec/suite_b/ops/elem.rs
@@ -15,7 +15,7 @@
 use crate::{
     arithmetic::{
         limbs_from_hex,
-        montgomery::{Encoding, ProductEncoding},
+        montgomery::{self, Encoding, ProductEncoding},
     },
     limb::{Limb, LIMB_BITS},
 };
@@ -129,3 +129,6 @@ pub fn unary_op_from_binary_op_assign<M, E: Encoding>(
 }
 
 pub const MAX_LIMBS: usize = (384 + (LIMB_BITS - 1)) / LIMB_BITS;
+
+#[allow(clippy::assertions_on_constants)]
+const _MAX_LIMBS_IS_LESS_THAN_MAX_LIMBS: () = assert!(MAX_LIMBS <= montgomery::MAX_LIMBS);
diff --git a/src/rsa/keypair.rs b/src/rsa/keypair.rs
index a2012789e6..fc43cb36f8 100644
--- a/src/rsa/keypair.rs
+++ b/src/rsa/keypair.rs
@@ -21,6 +21,7 @@ use crate::{
     arithmetic::{
         bigint,
         montgomery::{R, RR, RRR},
+        ImpossibleLengthError,
     },
     bits::BitLength,
     cpu, digest,
@@ -487,7 +488,7 @@ fn elem_exp_consttime<M>(
     p: &PrivateCrtPrime<M>,
     other_prime_len_bits: BitLength,
     cpu_features: cpu::Features,
-) -> Result<bigint::Elem<M>, error::Unspecified> {
+) -> Result<bigint::Elem<M>, ImpossibleLengthError> {
     let m = &p.modulus.modulus(cpu_features);
     let c_mod_m = bigint::elem_reduced(c, m, other_prime_len_bits);
     let c_mod_m = bigint::elem_mul(p.oneRRR.as_ref(), c_mod_m, m);