diff --git a/.github/workflows/dependabot_automerge.yml b/.github/workflows/dependabot_automerge.yml
index 0fd990a2..4fe3451b 100644
--- a/.github/workflows/dependabot_automerge.yml
+++ b/.github/workflows/dependabot_automerge.yml
@@ -16,7 +16,7 @@ jobs:
     steps:
       - name: Dependabot metadata
         id: metadata
-        uses: dependabot/fetch-metadata@v1
+        uses: dependabot/fetch-metadata@v2
         with:
           github-token: "${{ secrets.GITHUB_TOKEN }}"
       - name: Enable auto-merge for Dependabot PRs
diff --git a/Cargo.toml b/Cargo.toml
index 4c8034c5..04dccd59 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,12 +1,4 @@
-[package]
-authors    =["Pluto Authors"]
-description="""ronkathon"""
-edition    ="2021"
-license    ="Apache2.0 OR MIT"
-name       ="ronkathon"
-repository ="https://github.com/thor314/ronkathon"
-version    ="0.1.0"
+[workspace]
+resolver="2"
 
-[dependencies]
-anyhow  ="1.0"
-p3-field={ git="https://github.com/Plonky3/Plonky3.git" }
+members=["ronkathon", "field", "util"]
diff --git a/field/Cargo.toml b/field/Cargo.toml
new file mode 100644
index 00000000..33faa451
--- /dev/null
+++ b/field/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "p3-field"
+version = "0.1.0"
+edition = "2021"
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+p3-util = { path = "../util" }
+num-bigint = { version = "0.4.3", default-features = false }
+num-traits = { version = "0.2.18", default-features = false }
+
+itertools = "0.12.0"
+rand = "0.8.5"
+serde = { version = "1.0", default-features = false, features = ["derive"] }
diff --git a/field/src/array.rs b/field/src/array.rs
new file mode 100644
index 00000000..614eb4be
--- /dev/null
+++ b/field/src/array.rs
@@ -0,0 +1,148 @@
+use core::{
+  array,
+  iter::{Product, Sum},
+  ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign},
+};
+
+use crate::{AbstractField, Field};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct FieldArray<F: Field, const N: usize>(pub [F; N]);
+
+impl<F: Field, const N: usize> Default for FieldArray<F, N> {
+  fn default() -> Self { Self::zero() }
+}
+
+impl<F: Field, const N: usize> From<F> for FieldArray<F, N> {
+  fn from(val: F) -> Self { [val; N].into() }
+}
+
+impl<F: Field, const N: usize> From<[F; N]> for FieldArray<F, N> {
+  fn from(arr: [F; N]) -> Self { Self(arr) }
+}
+
+impl<F: Field, const N: usize> AbstractField for FieldArray<F, N> {
+  type F = F;
+
+  fn zero() -> Self { FieldArray([F::zero(); N]) }
+
+  fn one() -> Self { FieldArray([F::one(); N]) }
+
+  fn two() -> Self { FieldArray([F::two(); N]) }
+
+  fn neg_one() -> Self { FieldArray([F::neg_one(); N]) }
+
+  #[inline]
+  fn from_f(f: Self::F) -> Self { f.into() }
+
+  fn from_bool(b: bool) -> Self { [F::from_bool(b); N].into() }
+
+  fn from_canonical_u8(n: u8) -> Self { [F::from_canonical_u8(n); N].into() }
+
+  fn from_canonical_u16(n: u16) -> Self { [F::from_canonical_u16(n); N].into() }
+
+  fn from_canonical_u32(n: u32) -> Self { [F::from_canonical_u32(n); N].into() }
+
+  fn from_canonical_u64(n: u64) -> Self { [F::from_canonical_u64(n); N].into() }
+
+  fn from_canonical_usize(n: usize) -> Self { [F::from_canonical_usize(n); N].into() }
+
+  fn from_wrapped_u32(n: u32) -> Self { [F::from_wrapped_u32(n); N].into() }
+
+  fn from_wrapped_u64(n: u64) -> Self { [F::from_wrapped_u64(n); N].into() }
+
+  fn generator() -> Self { [F::generator(); N].into() }
+}
+
+impl<F: Field, const N: usize> Add for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn add(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] + rhs.0[i]).into() }
+}
+
+impl<F: Field, const N: usize> Add<F> for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn add(self, rhs: F) -> Self::Output { self.0.map(|x| x + rhs).into() }
+}
+
+impl<F: Field, const N: usize> AddAssign for FieldArray<F, N> {
+  #[inline]
+  fn add_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x += y); }
+}
+
+impl<F: Field, const N: usize> AddAssign<F> for FieldArray<F, N> {
+  #[inline]
+  fn add_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x += rhs); }
+}
+
+impl<F: Field, const N: usize> Sub for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn sub(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] - rhs.0[i]).into() }
+}
+
+impl<F: Field, const N: usize> Sub<F> for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn sub(self, rhs: F) -> Self::Output { self.0.map(|x| x - rhs).into() }
+}
+
+impl<F: Field, const N: usize> SubAssign for FieldArray<F, N> {
+  #[inline]
+  fn sub_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x -= y); }
+}
+
+impl<F: Field, const N: usize> SubAssign<F> for FieldArray<F, N> {
+  #[inline]
+  fn sub_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x -= rhs); }
+}
+
+impl<F: Field, const N: usize> Neg for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn neg(self) -> Self::Output { self.0.map(|x| -x).into() }
+}
+
+impl<F: Field, const N: usize> Mul for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn mul(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] * rhs.0[i]).into() }
+}
+
+impl<F: Field, const N: usize> Mul<F> for FieldArray<F, N> {
+  type Output = Self;
+
+  #[inline]
+  fn mul(self, rhs: F) -> Self::Output { self.0.map(|x| x * rhs).into() }
+}
+
+impl<F: Field, const N: usize> MulAssign for FieldArray<F, N> {
+  #[inline]
+  fn mul_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x *= y); }
+}
+
+impl<F: Field, const N: usize> MulAssign<F> for FieldArray<F, N> {
+  #[inline]
+  fn mul_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x *= rhs); }
+}
+
+impl<F: Field, const N: usize> Sum for FieldArray<F, N> {
+  #[inline]
+  fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+    iter.reduce(|lhs, rhs| lhs + rhs).unwrap_or(Self::zero())
+  }
+}
+
+impl<F: Field, const N: usize> Product for FieldArray<F, N> {
+  #[inline]
+  fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+    iter.reduce(|lhs, rhs| lhs * rhs).unwrap_or(Self::one())
+  }
+}
diff --git a/field/src/batch_inverse.rs b/field/src/batch_inverse.rs
new file mode 100644
index 00000000..dd591bf7
--- /dev/null
+++ b/field/src/batch_inverse.rs
@@ -0,0 +1,93 @@
+use alloc::{vec, vec::Vec};
+
+use crate::field::Field;
+
+/// Batch multiplicative inverses with Montgomery's trick
+/// This is Montgomery's trick. At a high level, we invert the product of the given field
+/// elements, then derive the individual inverses from that via multiplication.
+///
+/// The usual Montgomery trick involves calculating an array of cumulative products,
+/// resulting in a long dependency chain. To increase instruction-level parallelism, we
+/// compute WIDTH separate cumulative product arrays that only meet at the end.
+///
+/// # Panics
+/// Might panic if asserts or unwraps uncover a bug.
+pub fn batch_multiplicative_inverse<F: Field>(x: &[F]) -> Vec<F> {
+  // Higher WIDTH increases instruction-level parallelism, but too high a value will cause us
+  // to run out of registers.
+  const WIDTH: usize = 4;
+  // JN note: WIDTH is 4. The code is specialized to this value and will need
+  // modification if it is changed. I tried to make it more generic, but Rust's const
+  // generics are not yet good enough.
+
+  // Handle special cases. Paradoxically, below is repetitive but concise.
+  // The branches should be very predictable.
+  let n = x.len();
+  if n == 0 {
+    return Vec::new();
+  } else if n == 1 {
+    return vec![x[0].inverse()];
+  } else if n == 2 {
+    let x01 = x[0] * x[1];
+    let x01inv = x01.inverse();
+    return vec![x01inv * x[1], x01inv * x[0]];
+  } else if n == 3 {
+    let x01 = x[0] * x[1];
+    let x012 = x01 * x[2];
+    let x012inv = x012.inverse();
+    let x01inv = x012inv * x[2];
+    return vec![x01inv * x[1], x01inv * x[0], x012inv * x01];
+  }
+  debug_assert!(n >= WIDTH);
+
+  // Buf is reused for a few things to save allocations.
+  // Fill buf with cumulative product of x, only taking every 4th value. Concretely, buf will
+  // be [
+  //   x[0], x[1], x[2], x[3],
+  //   x[0] * x[4], x[1] * x[5], x[2] * x[6], x[3] * x[7],
+  //   x[0] * x[4] * x[8], x[1] * x[5] * x[9], x[2] * x[6] * x[10], x[3] * x[7] * x[11],
+  //   ...
+  // ].
+  // If n is not a multiple of WIDTH, the result is truncated from the end. For example,
+  // for n == 5, we get [x[0], x[1], x[2], x[3], x[0] * x[4]].
+  let mut buf: Vec<F> = Vec::with_capacity(n);
+  // cumul_prod holds the last WIDTH elements of buf. This is redundant, but it's how we
+  // convince LLVM to keep the values in the registers.
+  let mut cumul_prod: [F; WIDTH] = x[..WIDTH].try_into().unwrap();
+  buf.extend(cumul_prod);
+  for (i, &xi) in x[WIDTH..].iter().enumerate() {
+    cumul_prod[i % WIDTH] *= xi;
+    buf.push(cumul_prod[i % WIDTH]);
+  }
+  debug_assert_eq!(buf.len(), n);
+
+  let mut a_inv = {
+    // This is where the four dependency chains meet.
+    // Take the last four elements of buf and invert them all.
+    let c01 = cumul_prod[0] * cumul_prod[1];
+    let c23 = cumul_prod[2] * cumul_prod[3];
+    let c0123 = c01 * c23;
+    let c0123inv = c0123.inverse();
+    let c01inv = c0123inv * c23;
+    let c23inv = c0123inv * c01;
+    [c01inv * cumul_prod[1], c01inv * cumul_prod[0], c23inv * cumul_prod[3], c23inv * cumul_prod[2]]
+  };
+
+  for i in (WIDTH..n).rev() {
+    // buf[i - WIDTH] has not been written to by this loop, so it equals
+    // x[i % WIDTH] * x[i % WIDTH + WIDTH] * ... * x[i - WIDTH].
+    buf[i] = buf[i - WIDTH] * a_inv[i % WIDTH];
+    // buf[i] now holds the inverse of x[i].
+    a_inv[i % WIDTH] *= x[i];
+  }
+  for i in (0..WIDTH).rev() {
+    buf[i] = a_inv[i];
+  }
+
+  for (&bi, &xi) in buf.iter().zip(x) {
+    // Sanity check only.
+    debug_assert_eq!(bi * xi, F::one());
+  }
+
+  buf
+}
diff --git a/field/src/exponentiation.rs b/field/src/exponentiation.rs
new file mode 100644
index 00000000..f9febeb1
--- /dev/null
+++ b/field/src/exponentiation.rs
@@ -0,0 +1,122 @@
+use crate::AbstractField;
+
+pub fn exp_u64_by_squaring<AF: AbstractField>(val: AF, power: u64) -> AF {
+  let mut current = val;
+  let mut product = AF::one();
+
+  for j in 0..bits_u64(power) {
+    if (power >> j & 1) != 0 {
+      product *= current.clone();
+    }
+    current = current.square();
+  }
+  product
+}
+
+const fn bits_u64(n: u64) -> usize { (64 - n.leading_zeros()) as usize }
+
+pub fn exp_1717986917<AF: AbstractField>(val: AF) -> AF {
+  // Note that 5 * 1717986917 = 4*(2^31 - 2) + 1 = 1 mod p - 1.
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1717986917})^5 = a.
+  // Note the binary expansion: 1717986917 = 1100110011001100110011001100101_2
+  // This uses 30 Squares + 7 Multiplications => 37 Operations total.
+  // Suspect it's possible to improve this with enough effort. For example 1717986918 takes only 4
+  // Multiplications.
+  let p1 = val;
+  let p10 = p1.square();
+  let p11 = p10.clone() * p1;
+  let p101 = p10 * p11.clone();
+  let p110000 = p11.exp_power_of_2(4);
+  let p110011 = p110000 * p11.clone();
+  let p11001100000000 = p110011.exp_power_of_2(8);
+  let p11001100110011 = p11001100000000.clone() * p110011;
+  let p1100110000000000000000 = p11001100000000.exp_power_of_2(8);
+  let p1100110011001100110011 = p1100110000000000000000 * p11001100110011;
+  let p11001100110011001100110000 = p1100110011001100110011.exp_power_of_2(4);
+  let p11001100110011001100110011 = p11001100110011001100110000 * p11;
+  let p1100110011001100110011001100000 = p11001100110011001100110011.exp_power_of_2(5);
+  p1100110011001100110011001100000 * p101
+}
+
+pub fn exp_1420470955<AF: AbstractField>(val: AF) -> AF {
+  // Note that 3 * 1420470955 = 2*(2^31 - 2^24) + 1 = 1 mod (p - 1).
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1420470955})^3 = a.
+  // Note the binary expansion: 1420470955 = 1010100101010101010101010101011_2
+  // This uses 29 Squares + 7 Multiplications => 36 Operations total.
+  // Suspect it's possible to improve this with enough effort.
+  let p1 = val;
+  let p100 = p1.exp_power_of_2(2);
+  let p101 = p100.clone() * p1.clone();
+  let p10000 = p100.exp_power_of_2(2);
+  let p10101 = p10000 * p101;
+  let p10101000000 = p10101.clone().exp_power_of_2(6);
+  let p10101010101 = p10101000000.clone() * p10101.clone();
+  let p101010010101 = p10101000000 * p10101010101.clone();
+  let p101010010101000000000000 = p101010010101.exp_power_of_2(12);
+  let p101010010101010101010101 = p101010010101000000000000 * p10101010101;
+  let p101010010101010101010101000000 = p101010010101010101010101.exp_power_of_2(6);
+  let p101010010101010101010101010101 = p101010010101010101010101000000 * p10101;
+  let p1010100101010101010101010101010 = p101010010101010101010101010101.square();
+  p1010100101010101010101010101010 * p1.clone()
+}
+
+pub fn exp_1725656503<AF: AbstractField>(val: AF) -> AF {
+  // Note that 7 * 1725656503 = 6*(2^31 - 2^27) + 1 = 1 mod (p - 1).
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1725656503})^7 = a.
+  // Note the binary expansion: 1725656503 = 1100110110110110110110110110111_2
+  // This uses 29 Squares + 8 Multiplications => 37 Operations total.
+  // Suspect it's possible to improve this with enough effort.
+  let p1 = val;
+  let p10 = p1.square();
+  let p11 = p10 * p1.clone();
+  let p110 = p11.square();
+  let p111 = p110.clone() * p1;
+  let p11000 = p110.exp_power_of_2(2);
+  let p11011 = p11000.clone() * p11;
+  let p11000000 = p11000.exp_power_of_2(3);
+  let p11011011 = p11000000.clone() * p11011;
+  let p110011011 = p11011011.clone() * p11000000;
+  let p110011011000000000 = p110011011.exp_power_of_2(9);
+  let p110011011011011011 = p110011011000000000 * p11011011.clone();
+  let p110011011011011011000000000 = p110011011011011011.exp_power_of_2(9);
+  let p110011011011011011011011011 = p110011011011011011000000000 * p11011011;
+  let p1100110110110110110110110110000 = p110011011011011011011011011.exp_power_of_2(4);
+  p1100110110110110110110110110000 * p111
+}
+
+pub fn exp_10540996611094048183<AF: AbstractField>(val: AF) -> AF {
+  // Note that 7*10540996611094048183 = 4*(2^64 - 2**32) + 1 = 1 mod (p - 1).
+  // Thus as a^{p - 1} = 1 for all a \in F_p, (a^{10540996611094048183})^7 = a.
+  // Also: 10540996611094048183 =
+  // 1001001001001001001001001001000110110110110110110110110110110111_2. This uses 63 Squares + 8
+  // Multiplications => 71 Operations total. Suspect it's possible to improve this a little with
+  // enough effort.
+  let p1 = val;
+  let p10 = p1.square();
+  let p11 = p10.clone() * p1.clone();
+  let p100 = p10.square();
+  let p111 = p100.clone() * p11.clone();
+  let p100000000000000000000000000000000 = p100.exp_power_of_2(30);
+  let p100000000000000000000000000000011 = p100000000000000000000000000000000 * p11;
+  let p100000000000000000000000000000011000 = p100000000000000000000000000000011.exp_power_of_2(3);
+  let p100100000000000000000000000000011011 =
+    p100000000000000000000000000000011000 * p100000000000000000000000000000011;
+  let p100100000000000000000000000000011011000000 =
+    p100100000000000000000000000000011011.exp_power_of_2(6);
+  let p100100100100000000000000000000011011011011 =
+    p100100000000000000000000000000011011000000 * p100100000000000000000000000000011011.clone();
+  let p100100100100000000000000000000011011011011000000000000 =
+    p100100100100000000000000000000011011011011.exp_power_of_2(12);
+  let p100100100100100100100100000000011011011011011011011011 =
+    p100100100100000000000000000000011011011011000000000000
+      * p100100100100000000000000000000011011011011;
+  let p100100100100100100100100000000011011011011011011011011000000 =
+    p100100100100100100100100000000011011011011011011011011.exp_power_of_2(6);
+  let p100100100100100100100100100100011011011011011011011011011011 =
+    p100100100100100100100100000000011011011011011011011011000000
+      * p100100000000000000000000000000011011;
+  let p1001001001001001001001001001000110110110110110110110110110110000 =
+    p100100100100100100100100100100011011011011011011011011011011.exp_power_of_2(4);
+
+  p1001001001001001001001001001000110110110110110110110110110110000 * p111
+}
diff --git a/field/src/extension/binomial_extension.rs b/field/src/extension/binomial_extension.rs
new file mode 100644
index 00000000..a7b84b50
--- /dev/null
+++ b/field/src/extension/binomial_extension.rs
@@ -0,0 +1,512 @@
+use alloc::{format, string::ToString};
+use core::{
+  array,
+  fmt::{self, Debug, Display, Formatter},
+  iter::{Product, Sum},
+  ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
+};
+
+use itertools::Itertools;
+use num_bigint::BigUint;
+use rand::{distributions::Standard, prelude::Distribution};
+use serde::{Deserialize, Serialize};
+
+use super::{HasFrobenius, HasTwoAdicBionmialExtension};
+use crate::{
+  extension::BinomiallyExtendable, field::Field, field_to_array, AbstractExtensionField,
+  AbstractField, ExtensionField, Packable, TwoAdicField,
+};
+
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)]
+pub struct BinomialExtensionField<AF, const D: usize> {
+  #[serde(
+    with = "p3_util::array_serialization",
+    bound(serialize = "AF: Serialize", deserialize = "AF: Deserialize<'de>")
+  )]
+  pub(crate) value: [AF; D],
+}
+
+impl<AF: AbstractField, const D: usize> Default for BinomialExtensionField<AF, D> {
+  fn default() -> Self { Self { value: array::from_fn(|_| AF::zero()) } }
+}
+
+impl<AF: AbstractField, const D: usize> From<AF> for BinomialExtensionField<AF, D> {
+  fn from(x: AF) -> Self { Self { value: field_to_array::<AF, D>(x) } }
+}
+
+impl<F: BinomiallyExtendable<D>, const D: usize> Packable for BinomialExtensionField<F, D> {}
+
+impl<F: BinomiallyExtendable<D>, const D: usize> ExtensionField<F>
+  for BinomialExtensionField<F, D>
+{
+  type ExtensionPacking = BinomialExtensionField<F::Packing, D>;
+}
+
+impl<F: BinomiallyExtendable<D>, const D: usize> HasFrobenius<F> for BinomialExtensionField<F, D> {
+  /// FrobeniusField automorphisms: x -> x^n, where n is the order of BaseField.
+  fn frobenius(&self) -> Self { self.repeated_frobenius(1) }
+
+  /// Repeated Frobenius automorphisms: x -> x^(n^count).
+  ///
+  /// Follows precomputation suggestion in Section 11.3.3 of the
+  /// Handbook of Elliptic and Hyperelliptic Curve Cryptography.
+  fn repeated_frobenius(&self, count: usize) -> Self {
+    if count == 0 {
+      return *self;
+    } else if count >= D {
+      // x |-> x^(n^D) is the identity, so x^(n^count) ==
+      // x^(n^(count % D))
+      return self.repeated_frobenius(count % D);
+    }
+    let arr: &[F] = self.as_base_slice();
+
+    // z0 = DTH_ROOT^count = W^(k * count) where k = floor((n-1)/D)
+    let mut z0 = F::dth_root();
+    for _ in 1..count {
+      z0 *= F::dth_root();
+    }
+
+    let mut res = [F::zero(); D];
+    for (i, z) in z0.powers().take(D).enumerate() {
+      res[i] = arr[i] * z;
+    }
+
+    Self::from_base_slice(&res)
+  }
+
+  /// Algorithm 11.3.4 in Handbook of Elliptic and Hyperelliptic Curve Cryptography.
+  fn frobenius_inv(&self) -> Self {
+    // Writing 'a' for self, we need to compute a^(r-1):
+    // r = n^D-1/n-1 = n^(D-1)+n^(D-2)+...+n
+    let mut f = Self::one();
+    for _ in 1..D {
+      f = (f * *self).frobenius();
+    }
+
+    // g = a^r is in the base field, so only compute that
+    // coefficient rather than the full product.
+    let a = self.value;
+    let b = f.value;
+    let mut g = F::zero();
+    for i in 1..D {
+      g += a[i] * b[D - i];
+    }
+    g *= F::w();
+    g += a[0] * b[0];
+    debug_assert_eq!(Self::from(g), *self * f);
+
+    f * g.inverse()
+  }
+}
+
+impl<AF, const D: usize> AbstractField for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type F = BinomialExtensionField<AF::F, D>;
+
+  fn zero() -> Self { Self { value: field_to_array::<AF, D>(AF::zero()) } }
+
+  fn one() -> Self { Self { value: field_to_array::<AF, D>(AF::one()) } }
+
+  fn two() -> Self { Self { value: field_to_array::<AF, D>(AF::two()) } }
+
+  fn neg_one() -> Self { Self { value: field_to_array::<AF, D>(AF::neg_one()) } }
+
+  fn from_f(f: Self::F) -> Self { Self { value: f.value.map(AF::from_f) } }
+
+  fn from_bool(b: bool) -> Self { AF::from_bool(b).into() }
+
+  fn from_canonical_u8(n: u8) -> Self { AF::from_canonical_u8(n).into() }
+
+  fn from_canonical_u16(n: u16) -> Self { AF::from_canonical_u16(n).into() }
+
+  fn from_canonical_u32(n: u32) -> Self { AF::from_canonical_u32(n).into() }
+
+  /// Convert from `u64`. Undefined behavior if the input is outside the canonical range.
+  fn from_canonical_u64(n: u64) -> Self { AF::from_canonical_u64(n).into() }
+
+  /// Convert from `usize`. Undefined behavior if the input is outside the canonical range.
+  fn from_canonical_usize(n: usize) -> Self { AF::from_canonical_usize(n).into() }
+
+  fn from_wrapped_u32(n: u32) -> Self { AF::from_wrapped_u32(n).into() }
+
+  fn from_wrapped_u64(n: u64) -> Self { AF::from_wrapped_u64(n).into() }
+
+  fn generator() -> Self { Self { value: AF::F::ext_generator().map(AF::from_f) } }
+
+  #[inline(always)]
+  fn square(&self) -> Self {
+    match D {
+      2 => {
+        let a = self.value.clone();
+        let mut res = Self::default();
+        res.value[0] = a[0].square() + a[1].square() * AF::from_f(AF::F::w());
+        res.value[1] = a[0].clone() * a[1].double();
+        res
+      },
+      3 => Self { value: cubic_square(&self.value, AF::F::w()).to_vec().try_into().unwrap() },
+      _ => <Self as Mul<Self>>::mul(self.clone(), self.clone()),
+    }
+  }
+}
+
+impl<F: BinomiallyExtendable<D>, const D: usize> Field for BinomialExtensionField<F, D> {
+  type Packing = Self;
+
+  fn try_inverse(&self) -> Option<Self> {
+    if self.is_zero() {
+      return None;
+    }
+
+    match D {
+      2 => Some(Self::from_base_slice(&qudratic_inv(&self.value, F::w()))),
+      3 => Some(Self::from_base_slice(&cubic_inv(&self.value, F::w()))),
+      _ => Some(self.frobenius_inv()),
+    }
+  }
+
+  fn halve(&self) -> Self { Self { value: self.value.map(|x| x.halve()) } }
+
+  fn order() -> BigUint { F::order().pow(D as u32) }
+}
+
+impl<F, const D: usize> Display for BinomialExtensionField<F, D>
+where F: BinomiallyExtendable<D>
+{
+  fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+    if self.is_zero() {
+      write!(f, "0")
+    } else {
+      let str = self
+        .value
+        .iter()
+        .enumerate()
+        .filter(|(_, x)| !x.is_zero())
+        .map(|(i, x)| match (i, x.is_one()) {
+          (0, _) => format!("{x}"),
+          (1, true) => "X".to_string(),
+          (1, false) => format!("{x} X"),
+          (_, true) => format!("X^{i}"),
+          (_, false) => format!("{x} X^{i}"),
+        })
+        .join(" + ");
+      write!(f, "{}", str)
+    }
+  }
+}
+
+impl<AF, const D: usize> Neg for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn neg(self) -> Self { Self { value: self.value.map(AF::neg) } }
+}
+
+impl<AF, const D: usize> Add for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn add(self, rhs: Self) -> Self {
+    let mut res = self.value;
+    for (r, rhs_val) in res.iter_mut().zip(rhs.value) {
+      *r += rhs_val;
+    }
+    Self { value: res }
+  }
+}
+
+impl<AF, const D: usize> Add<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn add(self, rhs: AF) -> Self {
+    let mut res = self.value;
+    res[0] += rhs;
+    Self { value: res }
+  }
+}
+
+impl<AF, const D: usize> AddAssign for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  fn add_assign(&mut self, rhs: Self) { *self = self.clone() + rhs; }
+}
+
+impl<AF, const D: usize> AddAssign<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  fn add_assign(&mut self, rhs: AF) { *self = self.clone() + rhs; }
+}
+
+impl<AF, const D: usize> Sum for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+    let zero = Self { value: field_to_array::<AF, D>(AF::zero()) };
+    iter.fold(zero, |acc, x| acc + x)
+  }
+}
+
+impl<AF, const D: usize> Sub for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn sub(self, rhs: Self) -> Self {
+    let mut res = self.value;
+    for (r, rhs_val) in res.iter_mut().zip(rhs.value) {
+      *r -= rhs_val;
+    }
+    Self { value: res }
+  }
+}
+
+impl<AF, const D: usize> Sub<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn sub(self, rhs: AF) -> Self {
+    let mut res = self.value;
+    res[0] -= rhs;
+    Self { value: res }
+  }
+}
+
+impl<AF, const D: usize> SubAssign for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  #[inline]
+  fn sub_assign(&mut self, rhs: Self) { *self = self.clone() - rhs; }
+}
+
+impl<AF, const D: usize> SubAssign<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  #[inline]
+  fn sub_assign(&mut self, rhs: AF) { *self = self.clone() - rhs; }
+}
+
+impl<AF, const D: usize> Mul for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn mul(self, rhs: Self) -> Self {
+    let a = self.value;
+    let b = rhs.value;
+    let w = AF::F::w();
+    let w_af = AF::from_f(w);
+
+    match D {
+      2 => {
+        let mut res = Self::default();
+        res.value[0] = a[0].clone() * b[0].clone() + a[1].clone() * w_af * b[1].clone();
+        res.value[1] = a[0].clone() * b[1].clone() + a[1].clone() * b[0].clone();
+        res
+      },
+      3 => Self { value: cubic_mul(&a, &b, w).to_vec().try_into().unwrap() },
+      _ => {
+        let mut res = Self::default();
+        #[allow(clippy::needless_range_loop)]
+        for i in 0..D {
+          for j in 0..D {
+            if i + j >= D {
+              res.value[i + j - D] += a[i].clone() * w_af.clone() * b[j].clone();
+            } else {
+              res.value[i + j] += a[i].clone() * b[j].clone();
+            }
+          }
+        }
+        res
+      },
+    }
+  }
+}
+
+impl<AF, const D: usize> Mul<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  type Output = Self;
+
+  #[inline]
+  fn mul(self, rhs: AF) -> Self { Self { value: self.value.map(|x| x * rhs.clone()) } }
+}
+
+impl<AF, const D: usize> Product for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+    let one = Self { value: field_to_array::<AF, D>(AF::one()) };
+    iter.fold(one, |acc, x| acc * x)
+  }
+}
+
+impl<F, const D: usize> Div for BinomialExtensionField<F, D>
+where F: BinomiallyExtendable<D>
+{
+  type Output = Self;
+
+  #[allow(clippy::suspicious_arithmetic_impl)]
+  fn div(self, rhs: Self) -> Self::Output { self * rhs.inverse() }
+}
+
+impl<F, const D: usize> DivAssign for BinomialExtensionField<F, D>
+where F: BinomiallyExtendable<D>
+{
+  fn div_assign(&mut self, rhs: Self) { *self = *self / rhs; }
+}
+
+impl<AF, const D: usize> MulAssign for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  #[inline]
+  fn mul_assign(&mut self, rhs: Self) { *self = self.clone() * rhs; }
+}
+
+impl<AF, const D: usize> MulAssign<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  fn mul_assign(&mut self, rhs: AF) { *self = self.clone() * rhs; }
+}
+
+impl<AF, const D: usize> AbstractExtensionField<AF> for BinomialExtensionField<AF, D>
+where
+  AF: AbstractField,
+  AF::F: BinomiallyExtendable<D>,
+{
+  const D: usize = D;
+
+  fn from_base(b: AF) -> Self { Self { value: field_to_array(b) } }
+
+  fn from_base_slice(bs: &[AF]) -> Self {
+    Self { value: bs.to_vec().try_into().expect("slice has wrong length") }
+  }
+
+  #[inline]
+  fn from_base_fn<F: FnMut(usize) -> AF>(f: F) -> Self { Self { value: array::from_fn(f) } }
+
+  fn as_base_slice(&self) -> &[AF] { &self.value }
+}
+
+impl<F: BinomiallyExtendable<D>, const D: usize> Distribution<BinomialExtensionField<F, D>>
+  for Standard
+where Standard: Distribution<F>
+{
+  fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> BinomialExtensionField<F, D> {
+    let mut res = [F::zero(); D];
+    for r in res.iter_mut() {
+      *r = Standard.sample(rng);
+    }
+    BinomialExtensionField::<F, D>::from_base_slice(&res)
+  }
+}
+
+impl<F: Field + HasTwoAdicBionmialExtension<D>, const D: usize> TwoAdicField
+  for BinomialExtensionField<F, D>
+{
+  const TWO_ADICITY: usize = F::EXT_TWO_ADICITY;
+
+  fn two_adic_generator(bits: usize) -> Self { Self { value: F::ext_two_adic_generator(bits) } }
+}
+
+/// Section 11.3.6b in Handbook of Elliptic and Hyperelliptic Curve Cryptography.
+#[inline]
+fn qudratic_inv<F: Field>(a: &[F], w: F) -> [F; 2] {
+  let scalar = (a[0].square() - w * a[1].square()).inverse();
+  [a[0] * scalar, -a[1] * scalar]
+}
+
+/// Section 11.3.6b in Handbook of Elliptic and Hyperelliptic Curve Cryptography.
+#[inline]
+fn cubic_inv<F: Field>(a: &[F], w: F) -> [F; 3] {
+  let a0_square = a[0].square();
+  let a1_square = a[1].square();
+  let a2_w = w * a[2];
+  let a0_a1 = a[0] * a[1];
+
+  // scalar = (a0^3+wa1^3+w^2a2^3-3wa0a1a2)^-1
+  let scalar = (a0_square * a[0] + w * a[1] * a1_square + a2_w.square() * a[2]
+    - (F::one() + F::two()) * a2_w * a0_a1)
+    .inverse();
+
+  // scalar*[a0^2-wa1a2, wa2^2-a0a1, a1^2-a0a2]
+  [
+    scalar * (a0_square - a[1] * a2_w),
+    scalar * (a2_w * a[2] - a0_a1),
+    scalar * (a1_square - a[0] * a[2]),
+  ]
+}
+
+/// karatsuba multiplication for cubic extension field
+#[inline]
+fn cubic_mul<AF: AbstractField>(a: &[AF], b: &[AF], w: AF::F) -> [AF; 3] {
+  let a0_b0 = a[0].clone() * b[0].clone();
+  let a1_b1 = a[1].clone() * b[1].clone();
+  let a2_b2 = a[2].clone() * b[2].clone();
+
+  let c0 = a0_b0.clone()
+    + ((a[1].clone() + a[2].clone()) * (b[1].clone() + b[2].clone())
+      - a1_b1.clone()
+      - a2_b2.clone())
+      * AF::from_f(w);
+  let c1 =
+    (a[0].clone() + a[1].clone()) * (b[0].clone() + b[1].clone()) - a0_b0.clone() - a1_b1.clone()
+      + a2_b2.clone() * AF::from_f(w);
+  let c2 = (a[0].clone() + a[2].clone()) * (b[0].clone() + b[2].clone()) - a0_b0 - a2_b2 + a1_b1;
+
+  [c0, c1, c2]
+}
+
+/// Section 11.3.6a in Handbook of Elliptic and Hyperelliptic Curve Cryptography.
+#[inline]
+fn cubic_square<AF: AbstractField>(a: &[AF], w: AF::F) -> [AF; 3] {
+  let w_a2 = a[2].clone() * AF::from_f(w);
+
+  let c0 = a[0].square() + (a[1].clone() * w_a2.clone()).double();
+  let c1 = w_a2 * a[2].clone() + (a[0].clone() * a[1].clone()).double();
+  let c2 = a[1].square() + (a[0].clone() * a[2].clone()).double();
+
+  [c0, c1, c2]
+}
diff --git a/field/src/extension/complex.rs b/field/src/extension/complex.rs
new file mode 100644
index 00000000..743f43da
--- /dev/null
+++ b/field/src/extension/complex.rs
@@ -0,0 +1,87 @@
+use super::{BinomialExtensionField, BinomiallyExtendable, HasTwoAdicBionmialExtension};
+use crate::{AbstractExtensionField, AbstractField, Field};
+
+pub type Complex<AF> = BinomialExtensionField<AF, 2>;
+
+/// A field for which `p = 3 (mod 4)`. Equivalently, `-1` is not a square,
+/// so the complex extension can be defined `F[X]/(X^2+1)`.
+pub trait ComplexExtendable: Field {
+  /// The two-adicity of `p+1`, the order of the circle group.
+  const CIRCLE_TWO_ADICITY: usize;
+
+  fn complex_generator() -> Complex<Self>;
+
+  fn circle_two_adic_generator(bits: usize) -> Complex<Self>;
+}
+
+impl<F: ComplexExtendable> BinomiallyExtendable<2> for F {
+  fn w() -> Self { F::neg_one() }
+
+  fn dth_root() -> Self {
+    // since `p = 3 (mod 4)`, `(p-1)/2` is always odd,
+    // so `(-1)^((p-1)/2) = -1`
+    F::neg_one()
+  }
+
+  fn ext_generator() -> [Self; 2] { F::complex_generator().value }
+}
+
+/// Convenience methods for complex extensions
+impl<AF: AbstractField> Complex<AF> {
+  pub const fn new(real: AF, imag: AF) -> Self { Self { value: [real, imag] } }
+
+  pub fn new_real(real: AF) -> Self { Self::new(real, AF::zero()) }
+
+  pub fn new_imag(imag: AF) -> Self { Self::new(AF::zero(), imag) }
+
+  pub fn real(&self) -> AF { self.value[0].clone() }
+
+  pub fn imag(&self) -> AF { self.value[1].clone() }
+
+  pub fn conjugate(&self) -> Self { Self::new(self.real(), self.imag().neg()) }
+
+  pub fn norm(&self) -> AF { self.real().square() + self.imag().square() }
+
+  pub fn to_array(&self) -> [AF; 2] { self.value.clone() }
+
+  // Sometimes we want to rotate over an extension that's not necessarily ComplexExtendable,
+  // but still on the circle.
+  pub fn rotate<Ext: AbstractExtensionField<AF>>(&self, rhs: Complex<Ext>) -> Complex<Ext> {
+    Complex::<Ext>::new(
+      rhs.real() * self.real() - rhs.imag() * self.imag(),
+      rhs.imag() * self.real() + rhs.real() * self.imag(),
+    )
+  }
+}
+
+/// The complex extension of this field has a binomial extension.
+pub trait HasComplexBinomialExtension<const D: usize>: ComplexExtendable {
+  fn w() -> Complex<Self>;
+  fn dth_root() -> Complex<Self>;
+  fn ext_generator() -> [Complex<Self>; D];
+}
+
+impl<F, const D: usize> BinomiallyExtendable<D> for Complex<F>
+where F: HasComplexBinomialExtension<D>
+{
+  fn w() -> Self { <F as HasComplexBinomialExtension<D>>::w() }
+
+  fn dth_root() -> Self { <F as HasComplexBinomialExtension<D>>::dth_root() }
+
+  fn ext_generator() -> [Self; D] { <F as HasComplexBinomialExtension<D>>::ext_generator() }
+}
+
+/// The complex extension of this field has a two-adic binomial extension.
+pub trait HasTwoAdicComplexBinomialExtension<const D: usize>:
+  HasComplexBinomialExtension<D> {
+  const COMPLEX_EXT_TWO_ADICITY: usize;
+  fn complex_ext_two_adic_generator(bits: usize) -> [Complex<Self>; D];
+}
+
+impl<F, const D: usize> HasTwoAdicBionmialExtension<D> for Complex<F>
+where F: HasTwoAdicComplexBinomialExtension<D>
+{
+  const EXT_TWO_ADICITY: usize = F::COMPLEX_EXT_TWO_ADICITY;
+
+  fn ext_two_adic_generator(bits: usize) -> [Self; D] { F::complex_ext_two_adic_generator(bits) }
+}
diff --git a/field/src/extension/mod.rs b/field/src/extension/mod.rs
new file mode 100644
index 00000000..ea1c165e
--- /dev/null
+++ b/field/src/extension/mod.rs
@@ -0,0 +1,56 @@
+use core::{debug_assert, debug_assert_eq, iter};
+
+use crate::{field::Field, naive_poly_mul, ExtensionField};
+
+mod binomial_extension;
+mod complex;
+
+use alloc::{vec, vec::Vec};
+
+pub use binomial_extension::*;
+pub use complex::*;
+
+/// Binomial extension field trait.
+/// A extension field with a irreducible polynomial X^d-W
+/// such that the extension is `F[X]/(X^d-W)`.
+pub trait BinomiallyExtendable<const D: usize>: Field {
+  fn w() -> Self;
+
+  // DTH_ROOT = W^((n - 1)/D).
+  // n is the order of base field.
+  // Only works when exists k such that n = kD + 1.
+  fn dth_root() -> Self;
+
+  fn ext_generator() -> [Self; D];
+}
+
+pub trait HasFrobenius<F: Field>: ExtensionField<F> {
+  fn frobenius(&self) -> Self;
+  fn repeated_frobenius(&self, count: usize) -> Self;
+  fn frobenius_inv(&self) -> Self;
+
+  fn minimal_poly(mut self) -> Vec<F> {
+    let mut m = vec![Self::one()];
+    for _ in 0..Self::D {
+      m = naive_poly_mul(&m, &[-self, Self::one()]);
+      self = self.frobenius();
+    }
+    let mut m_iter = m.into_iter().map(|c| c.as_base().expect("Extension is not algebraic?"));
+    let m: Vec<F> = m_iter.by_ref().take(Self::D + 1).collect();
+    debug_assert_eq!(m.len(), Self::D + 1);
+    debug_assert_eq!(m.last(), Some(&F::one()));
+    debug_assert!(m_iter.all(|c| c.is_zero()));
+    m
+  }
+
+  fn galois_group(self) -> Vec<Self> {
+    iter::successors(Some(self), |x| Some(x.frobenius())).take(Self::D).collect()
+  }
+}
+
+/// Optional trait for implementing Two Adic Binomial Extension Field.
+pub trait HasTwoAdicBionmialExtension<const D: usize>: BinomiallyExtendable<D> {
+  const EXT_TWO_ADICITY: usize;
+
+  fn ext_two_adic_generator(bits: usize) -> [Self; D];
+}
diff --git a/field/src/field.rs b/field/src/field.rs
new file mode 100644
index 00000000..d755b759
--- /dev/null
+++ b/field/src/field.rs
@@ -0,0 +1,390 @@
+use alloc::vec;
+use core::{
+  fmt::{Debug, Display},
+  hash::Hash,
+  iter::{Product, Sum},
+  ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign},
+  slice,
+};
+
+use itertools::Itertools;
+use num_bigint::BigUint;
+use serde::{de::DeserializeOwned, Serialize};
+
+use crate::{
+  exponentiation::exp_u64_by_squaring,
+  packed::{PackedField, PackedValue},
+  Packable,
+};
+
+/// A generalization of `Field` which permits things like
+/// - an actual field element
+/// - a symbolic expression which would evaluate to a field element
+/// - a vector of field elements
+pub trait AbstractField:
+  Sized
+  + Default
+  + Clone
+  + Add<Output = Self>
+  + AddAssign
+  + Sub<Output = Self>
+  + SubAssign
+  + Neg<Output = Self>
+  + Mul<Output = Self>
+  + MulAssign
+  + Sum
+  + Product
+  + Debug {
+  type F: Field;
+
+  fn zero() -> Self;
+  fn one() -> Self;
+  fn two() -> Self;
+  fn neg_one() -> Self;
+
+  fn from_f(f: Self::F) -> Self;
+  fn from_bool(b: bool) -> Self;
+  fn from_canonical_u8(n: u8) -> Self;
+  fn from_canonical_u16(n: u16) -> Self;
+  fn from_canonical_u32(n: u32) -> Self;
+  fn from_canonical_u64(n: u64) -> Self;
+  fn from_canonical_usize(n: usize) -> Self;
+
+  fn from_wrapped_u32(n: u32) -> Self;
+  fn from_wrapped_u64(n: u64) -> Self;
+
+  /// A generator of this field's entire multiplicative group.
+  fn generator() -> Self;
+
+  #[must_use]
+  fn double(&self) -> Self { self.clone() + self.clone() }
+
+  #[must_use]
+  fn square(&self) -> Self { self.clone() * self.clone() }
+
+  #[must_use]
+  fn cube(&self) -> Self { self.square() * self.clone() }
+
+  /// Exponentiation by a `u64` power.
+  ///
+  /// The default implementation calls `exp_u64_generic`, which by default performs exponentiation
+  /// by squaring. Rather than override this method, it is generally recommended to have the
+  /// concrete field type override `exp_u64_generic`, so that any optimizations will apply to all
+  /// abstract fields.
+  #[must_use]
+  #[inline]
+  fn exp_u64(&self, power: u64) -> Self { Self::F::exp_u64_generic(self.clone(), power) }
+
+  #[must_use]
+  #[inline(always)]
+  fn exp_const_u64<const POWER: u64>(&self) -> Self {
+    match POWER {
+      0 => Self::one(),
+      1 => self.clone(),
+      2 => self.square(),
+      3 => self.cube(),
+      4 => self.square().square(),
+      5 => self.square().square() * self.clone(),
+      6 => self.square().cube(),
+      7 => {
+        let x2 = self.square();
+        let x3 = x2.clone() * self.clone();
+        let x4 = x2.square();
+        x3 * x4
+      },
+      _ => self.exp_u64(POWER),
+    }
+  }
+
+  #[must_use]
+  fn exp_power_of_2(&self, power_log: usize) -> Self {
+    let mut res = self.clone();
+    for _ in 0..power_log {
+      res = res.square();
+    }
+    res
+  }
+
+  #[must_use]
+  fn powers(&self) -> Powers<Self> { self.shifted_powers(Self::one()) }
+
+  fn shifted_powers(&self, start: Self) -> Powers<Self> {
+    Powers { base: self.clone(), current: start }
+  }
+
+  fn powers_packed<P: PackedField<Scalar = Self>>(&self) -> PackedPowers<Self, P> {
+    self.shifted_powers_packed(Self::one())
+  }
+
+  fn shifted_powers_packed<P: PackedField<Scalar = Self>>(
+    &self,
+    start: Self,
+  ) -> PackedPowers<Self, P> {
+    let mut current = P::from_f(start);
+    let slice = current.as_slice_mut();
+    for i in 1..P::WIDTH {
+      slice[i] = slice[i - 1].clone() * self.clone();
+    }
+
+    PackedPowers { multiplier: P::from_f(self.clone()).exp_u64(P::WIDTH as u64), current }
+  }
+
+  fn dot_product<const N: usize>(u: &[Self; N], v: &[Self; N]) -> Self {
+    u.iter().zip(v).map(|(x, y)| x.clone() * y.clone()).sum()
+  }
+
+  fn try_div<Rhs>(self, rhs: Rhs) -> Option<<Self as Mul<Rhs>>::Output>
+  where
+    Rhs: Field,
+    Self: Mul<Rhs>, {
+    rhs.try_inverse().map(|inv| self * inv)
+  }
+}
+
+/// An element of a finite field.
+pub trait Field:
+  AbstractField<F = Self>
+  + Packable
+  + 'static
+  + Copy
+  + Div<Self, Output = Self>
+  + Eq
+  + Hash
+  + Send
+  + Sync
+  + Display
+  + Serialize
+  + DeserializeOwned {
+  type Packing: PackedField<Scalar = Self>;
+
+  fn is_zero(&self) -> bool { *self == Self::zero() }
+
+  fn is_one(&self) -> bool { *self == Self::one() }
+
+  /// self * 2^exp
+  #[must_use]
+  #[inline]
+  fn mul_2exp_u64(&self, exp: u64) -> Self { *self * Self::two().exp_u64(exp) }
+
+  /// self / 2^exp
+  #[must_use]
+  #[inline]
+  fn div_2exp_u64(&self, exp: u64) -> Self { *self / Self::two().exp_u64(exp) }
+
+  /// Exponentiation by a `u64` power. This is similar to `exp_u64`, but more general in that it
+  /// can be used with `AbstractField`s, not just this concrete field.
+  ///
+  /// The default implementation uses naive square and multiply. Implementations may want to
+  /// override this and handle certain powers with more optimal addition chains.
+  #[must_use]
+  #[inline]
+  fn exp_u64_generic<AF: AbstractField<F = Self>>(val: AF, power: u64) -> AF {
+    exp_u64_by_squaring(val, power)
+  }
+
+  /// The multiplicative inverse of this field element, if it exists.
+  ///
+  /// NOTE: The inverse of `0` is undefined and will return `None`.
+  #[must_use]
+  fn try_inverse(&self) -> Option<Self>;
+
+  #[must_use]
+  fn inverse(&self) -> Self { self.try_inverse().expect("Tried to invert zero") }
+
+  /// Computes input/2.
+  /// Should be overwritten by most field implementations to use bitshifts.
+  /// Will error if the field characteristic is 2.
+  #[must_use]
+  fn halve(&self) -> Self {
+    let half =
+      Self::two().try_inverse().expect("Cannot divide by 2 in fields with characteristic 2");
+    *self * half
+  }
+
+  fn order() -> BigUint;
+
+  #[inline]
+  fn bits() -> usize { Self::order().bits() as usize }
+}
+
+pub trait PrimeField: Field + Ord {
+  fn as_canonical_biguint(&self) -> BigUint;
+}
+
+/// A prime field of order less than `2^64`.
+pub trait PrimeField64: PrimeField {
+  const ORDER_U64: u64;
+
+  /// Return the representative of `value` that is less than `ORDER_U64`.
+  fn as_canonical_u64(&self) -> u64;
+}
+
+/// A prime field of order less than `2^32`.
+pub trait PrimeField32: PrimeField64 {
+  const ORDER_U32: u32;
+
+  /// Return the representative of `value` that is less than `ORDER_U32`.
+  fn as_canonical_u32(&self) -> u32;
+}
+
+pub trait AbstractExtensionField<Base: AbstractField>:
+  AbstractField
+  + From<Base>
+  + Add<Base, Output = Self>
+  + AddAssign<Base>
+  + Sub<Base, Output = Self>
+  + SubAssign<Base>
+  + Mul<Base, Output = Self>
+  + MulAssign<Base> {
+  const D: usize;
+
+  fn from_base(b: Base) -> Self;
+
+  /// Suppose this field extension is represented by the quotient
+  /// ring B[X]/(f(X)) where B is `Base` and f is an irreducible
+  /// polynomial of degree `D`. This function takes a slice `bs` of
+  /// length at most D, and constructs the field element
+  /// \sum_i bs[i] * X^i.
+  ///
+  /// NB: The value produced by this function fundamentally depends
+  /// on the choice of irreducible polynomial f. Care must be taken
+  /// to ensure portability if these values might ever be passed to
+  /// (or rederived within) another compilation environment where a
+  /// different f might have been used.
+  fn from_base_slice(bs: &[Base]) -> Self;
+
+  /// Similar to `core:array::from_fn`, with the same caveats as
+  /// `from_base_slice`.
+  fn from_base_fn<F: FnMut(usize) -> Base>(f: F) -> Self;
+
+  /// Suppose this field extension is represented by the quotient
+  /// ring B[X]/(f(X)) where B is `Base` and f is an irreducible
+  /// polynomial of degree `D`. This function takes a field element
+  /// \sum_i bs[i] * X^i and returns the coefficients as a slice
+  /// `bs` of length at most D containing, from lowest degree to
+  /// highest.
+  ///
+  /// NB: The value produced by this function fundamentally depends
+  /// on the choice of irreducible polynomial f. Care must be taken
+  /// to ensure portability if these values might ever be passed to
+  /// (or rederived within) another compilation environment where a
+  /// different f might have been used.
+  fn as_base_slice(&self) -> &[Base];
+
+  /// Suppose this field extension is represented by the quotient
+  /// ring B[X]/(f(X)) where B is `Base` and f is an irreducible
+  /// polynomial of degree `D`. This function returns the field
+  /// element `X^exponent` if `exponent < D` and panics otherwise.
+  /// (The fact that f is not known at the point that this function
+  /// is defined prevents implementing exponentiation of higher
+  /// powers since the reduction cannot be performed.)
+  ///
+  /// NB: The value produced by this function fundamentally depends
+  /// on the choice of irreducible polynomial f. Care must be taken
+  /// to ensure portability if these values might ever be passed to
+  /// (or rederived within) another compilation environment where a
+  /// different f might have been used.
+  fn monomial(exponent: usize) -> Self {
+    assert!(exponent < Self::D, "requested monomial of too high degree");
+    let mut vec = vec![Base::zero(); Self::D];
+    vec[exponent] = Base::one();
+    Self::from_base_slice(&vec)
+  }
+}
+
+pub trait ExtensionField<Base: Field>: Field + AbstractExtensionField<Base> {
+  type ExtensionPacking: AbstractExtensionField<Base::Packing, F = Self>
+    + 'static
+    + Copy
+    + Send
+    + Sync;
+
+  fn is_in_basefield(&self) -> bool { self.as_base_slice()[1..].iter().all(Field::is_zero) }
+  fn as_base(&self) -> Option<Base> {
+    if self.is_in_basefield() {
+      Some(self.as_base_slice()[0])
+    } else {
+      None
+    }
+  }
+
+  fn ext_powers_packed(&self) -> impl Iterator<Item = Self::ExtensionPacking> {
+    let powers = self.powers().take(Base::Packing::WIDTH + 1).collect_vec();
+    // Transpose first WIDTH powers
+    let current = Self::ExtensionPacking::from_base_fn(|i| {
+      Base::Packing::from_fn(|j| powers[j].as_base_slice()[i])
+    });
+    // Broadcast self^WIDTH
+    let multiplier = Self::ExtensionPacking::from_base_fn(|i| {
+      Base::Packing::from(powers[Base::Packing::WIDTH].as_base_slice()[i])
+    });
+
+    core::iter::successors(Some(current), move |&current| Some(current * multiplier))
+  }
+}
+
+impl<F: Field> ExtensionField<F> for F {
+  type ExtensionPacking = F::Packing;
+}
+
+impl<AF: AbstractField> AbstractExtensionField<AF> for AF {
+  const D: usize = 1;
+
+  fn from_base(b: AF) -> Self { b }
+
+  fn from_base_slice(bs: &[AF]) -> Self {
+    assert_eq!(bs.len(), 1);
+    bs[0].clone()
+  }
+
+  fn from_base_fn<F: FnMut(usize) -> AF>(mut f: F) -> Self { f(0) }
+
+  fn as_base_slice(&self) -> &[AF] { slice::from_ref(self) }
+}
+
+/// A field which supplies information like the two-adicity of its multiplicative group, and methods
+/// for obtaining two-adic generators.
+pub trait TwoAdicField: Field {
+  /// The number of factors of two in this field's multiplicative group.
+  const TWO_ADICITY: usize;
+
+  /// Returns a generator of the multiplicative group of order `2^bits`.
+  /// Assumes `bits < TWO_ADICITY`, otherwise the result is undefined.
+  #[must_use]
+  fn two_adic_generator(bits: usize) -> Self;
+}
+
+/// An iterator over the powers of a certain base element `b`: `b^0, b^1, b^2, ...`.
+#[derive(Clone, Debug)]
+pub struct Powers<F> {
+  pub base:    F,
+  pub current: F,
+}
+
+impl<AF: AbstractField> Iterator for Powers<AF> {
+  type Item = AF;
+
+  fn next(&mut self) -> Option<AF> {
+    let result = self.current.clone();
+    self.current *= self.base.clone();
+    Some(result)
+  }
+}
+
+/// like `Powers`, but packed into `PackedField` elements
+#[derive(Clone, Debug)]
+pub struct PackedPowers<F, P: PackedField<Scalar = F>> {
+  // base ** P::WIDTH
+  pub multiplier: P,
+  pub current:    P,
+}
+
+impl<AF: AbstractField, P: PackedField<Scalar = AF>> Iterator for PackedPowers<AF, P> {
+  type Item = P;
+
+  fn next(&mut self) -> Option<P> {
+    let result = self.current;
+    self.current *= self.multiplier;
+    Some(result)
+  }
+}
diff --git a/field/src/helpers.rs b/field/src/helpers.rs
new file mode 100644
index 00000000..f4591be8
--- /dev/null
+++ b/field/src/helpers.rs
@@ -0,0 +1,167 @@
+use alloc::{vec, vec::Vec};
+use core::{array, iter::Sum, ops::Mul};
+
+use num_bigint::BigUint;
+
+use crate::{field::Field, AbstractField, PrimeField, PrimeField32, TwoAdicField};
+
+/// Computes `Z_H(x)`, where `Z_H` is the zerofier of a multiplicative subgroup of order `2^log_n`.
+pub fn two_adic_subgroup_zerofier<F: TwoAdicField>(log_n: usize, x: F) -> F {
+  x.exp_power_of_2(log_n) - F::one()
+}
+
+/// Computes `Z_{sH}(x)`, where `Z_{sH}` is the zerofier of the given coset of a multiplicative
+/// subgroup of order `2^log_n`.
+pub fn two_adic_coset_zerofier<F: TwoAdicField>(log_n: usize, shift: F, x: F) -> F {
+  x.exp_power_of_2(log_n) - shift.exp_power_of_2(log_n)
+}
+
+/// Computes a multiplicative subgroup whose order is known in advance.
+pub fn cyclic_subgroup_known_order<F: Field>(
+  generator: F,
+  order: usize,
+) -> impl Iterator<Item = F> + Clone {
+  generator.powers().take(order)
+}
+
+/// Computes a coset of a multiplicative subgroup whose order is known in advance.
+pub fn cyclic_subgroup_coset_known_order<F: Field>(
+  generator: F,
+  shift: F,
+  order: usize,
+) -> impl Iterator<Item = F> + Clone {
+  cyclic_subgroup_known_order(generator, order).map(move |x| x * shift)
+}
+
+#[must_use]
+pub fn add_vecs<F: Field>(v: Vec<F>, w: Vec<F>) -> Vec<F> {
+  assert_eq!(v.len(), w.len());
+  v.into_iter().zip(w).map(|(x, y)| x + y).collect()
+}
+
+pub fn sum_vecs<F: Field, I: Iterator<Item = Vec<F>>>(iter: I) -> Vec<F> {
+  iter.reduce(|v, w| add_vecs(v, w)).expect("sum_vecs: empty iterator")
+}
+
+pub fn scale_vec<F: Field>(s: F, vec: Vec<F>) -> Vec<F> { vec.into_iter().map(|x| s * x).collect() }
+
+/// `x += y * s`, where `s` is a scalar.
+pub fn add_scaled_slice_in_place<F, Y>(x: &mut [F], y: Y, s: F)
+where
+  F: Field,
+  Y: Iterator<Item = F>, {
+  // TODO: Use PackedField
+  x.iter_mut().zip(y).for_each(|(x_i, y_i)| *x_i += y_i * s);
+}
+
+/// Extend a field `AF` element `x` to an array of length `D`
+/// by filling zeros.
+pub fn field_to_array<AF: AbstractField, const D: usize>(x: AF) -> [AF; D] {
+  let mut arr = array::from_fn(|_| AF::zero());
+  arr[0] = x;
+  arr
+}
+
+/// Naive polynomial multiplication.
+pub fn naive_poly_mul<AF: AbstractField>(a: &[AF], b: &[AF]) -> Vec<AF> {
+  // Grade school algorithm
+  let mut product = vec![AF::zero(); a.len() + b.len() - 1];
+  for (i, c1) in a.iter().enumerate() {
+    for (j, c2) in b.iter().enumerate() {
+      product[i + j] += c1.clone() * c2.clone();
+    }
+  }
+  product
+}
+
+/// Expand a product of binomials (x - roots[0])(x - roots[1]).. into polynomial coefficients.
+pub fn binomial_expand<AF: AbstractField>(roots: &[AF]) -> Vec<AF> {
+  let mut coeffs = vec![AF::zero(); roots.len() + 1];
+  coeffs[0] = AF::one();
+  for (i, x) in roots.iter().enumerate() {
+    for j in (1..i + 2).rev() {
+      coeffs[j] = coeffs[j - 1].clone() - x.clone() * coeffs[j].clone();
+    }
+    coeffs[0] *= -x.clone();
+  }
+  coeffs
+}
+
+pub fn eval_poly<AF: AbstractField>(poly: &[AF], x: AF) -> AF {
+  let mut acc = AF::zero();
+  for coeff in poly.iter().rev() {
+    acc *= x.clone();
+    acc += coeff.clone();
+  }
+  acc
+}
+
+/// Given an element x from a 32 bit field F_P compute x/2.
+#[inline]
+pub fn halve_u32<const P: u32>(input: u32) -> u32 {
+  let shift = (P + 1) >> 1;
+  let shr = input >> 1;
+  let lo_bit = input & 1;
+  let shr_corr = shr + shift;
+  if lo_bit == 0 {
+    shr
+  } else {
+    shr_corr
+  }
+}
+
+/// Given an element x from a 64 bit field F_P compute x/2.
+#[inline]
+pub fn halve_u64<const P: u64>(input: u64) -> u64 {
+  let shift = (P + 1) >> 1;
+  let shr = input >> 1;
+  let lo_bit = input & 1;
+  let shr_corr = shr + shift;
+  if lo_bit == 0 {
+    shr
+  } else {
+    shr_corr
+  }
+}
+
+/// Given a slice of SF elements, reduce them to a TF element using a 2^32-base decomposition.
+pub fn reduce_32<SF: PrimeField32, TF: PrimeField>(vals: &[SF]) -> TF {
+  let po2 = TF::from_canonical_u64(1u64 << 32);
+  let mut result = TF::zero();
+  for val in vals.iter().rev() {
+    result = result * po2 + TF::from_canonical_u32(val.as_canonical_u32());
+  }
+  result
+}
+
+/// Given an SF element, split it to a vector of TF elements using a 2^64-base decomposition.
+///
+/// We use a 2^64-base decomposition for a field of size ~2^32 because then the bias will be
+/// at most ~1/2^32 for each element after the reduction.
+pub fn split_32<SF: PrimeField, TF: PrimeField32>(val: SF, n: usize) -> Vec<TF> {
+  let po2 = BigUint::from(1u128 << 64);
+  let mut val = val.as_canonical_biguint();
+  let mut result = Vec::new();
+  for _ in 0..n {
+    let mask: BigUint = po2.clone() - BigUint::from(1u128);
+    let digit: BigUint = val.clone() & mask;
+    let digit_u64s = digit.to_u64_digits();
+    if !digit_u64s.is_empty() {
+      result.push(TF::from_wrapped_u64(digit_u64s[0]));
+    } else {
+      result.push(TF::zero())
+    }
+    val /= po2.clone();
+  }
+  result
+}
+
+/// Maximally generic dot product.
+pub fn dot_product<S, LI, RI>(li: LI, ri: RI) -> S
+where
+  LI: Iterator,
+  RI: Iterator,
+  LI::Item: Mul<RI::Item>,
+  S: Sum<<LI::Item as Mul<RI::Item>>::Output>, {
+  li.zip(ri).map(|(l, r)| l * r).sum()
+}
diff --git a/field/src/lib.rs b/field/src/lib.rs
new file mode 100644
index 00000000..8f483393
--- /dev/null
+++ b/field/src/lib.rs
@@ -0,0 +1,20 @@
+//! A framework for finite fields.
+
+#![no_std]
+
+extern crate alloc;
+
+mod array;
+mod batch_inverse;
+mod exponentiation;
+pub mod extension;
+mod field;
+mod helpers;
+mod packed;
+
+pub use array::*;
+pub use batch_inverse::*;
+pub use exponentiation::*;
+pub use field::*;
+pub use helpers::*;
+pub use packed::*;
diff --git a/field/src/packed.rs b/field/src/packed.rs
new file mode 100644
index 00000000..0ce8551e
--- /dev/null
+++ b/field/src/packed.rs
@@ -0,0 +1,173 @@
+use core::{
+  mem,
+  ops::{Add, AddAssign, Div, Mul, MulAssign, Sub, SubAssign},
+  slice,
+};
+
+use crate::{field::Field, AbstractField};
+
+/// A trait to constrain types that can be packed into a packed value.
+///
+/// The `Packable` trait allows us to specify implementations for potentially conflicting types.
+pub trait Packable: 'static + Default + Copy + Send + Sync + PartialEq + Eq {}
+
+/// # Safety
+/// - `WIDTH` is assumed to be a power of 2.
+/// - If `P` implements `PackedField` then `P` must be castable to/from `[P::Value; P::WIDTH]`
+///   without UB.
+pub unsafe trait PackedValue: 'static + Copy + From<Self::Value> + Send + Sync {
+  type Value: Packable;
+
+  const WIDTH: usize;
+
+  fn from_slice(slice: &[Self::Value]) -> &Self;
+  fn from_slice_mut(slice: &mut [Self::Value]) -> &mut Self;
+
+  /// Similar to `core:array::from_fn`.
+  fn from_fn<F>(f: F) -> Self
+  where F: FnMut(usize) -> Self::Value;
+
+  fn as_slice(&self) -> &[Self::Value];
+  fn as_slice_mut(&mut self) -> &mut [Self::Value];
+
+  fn pack_slice(buf: &[Self::Value]) -> &[Self] {
+    // Sources vary, but this should be true on all platforms we care about.
+    // This should be a const assert, but trait methods can't access `Self` in a const context,
+    // even with inner struct instantiation. So we will trust LLVM to optimize this out.
+    assert!(mem::align_of::<Self>() <= mem::align_of::<Self::Value>());
+    assert!(
+      buf.len() % Self::WIDTH == 0,
+      "Slice length (got {}) must be a multiple of packed field width ({}).",
+      buf.len(),
+      Self::WIDTH
+    );
+    let buf_ptr = buf.as_ptr().cast::<Self>();
+    let n = buf.len() / Self::WIDTH;
+    unsafe { slice::from_raw_parts(buf_ptr, n) }
+  }
+
+  fn pack_slice_with_suffix(buf: &[Self::Value]) -> (&[Self], &[Self::Value]) {
+    let (packed, suffix) = buf.split_at(buf.len() - buf.len() % Self::WIDTH);
+    (Self::pack_slice(packed), suffix)
+  }
+
+  fn pack_slice_mut(buf: &mut [Self::Value]) -> &mut [Self] {
+    assert!(mem::align_of::<Self>() <= mem::align_of::<Self::Value>());
+    assert!(
+      buf.len() % Self::WIDTH == 0,
+      "Slice length (got {}) must be a multiple of packed field width ({}).",
+      buf.len(),
+      Self::WIDTH
+    );
+    let buf_ptr = buf.as_mut_ptr().cast::<Self>();
+    let n = buf.len() / Self::WIDTH;
+    unsafe { slice::from_raw_parts_mut(buf_ptr, n) }
+  }
+
+  fn pack_slice_with_suffix_mut(buf: &mut [Self::Value]) -> (&mut [Self], &mut [Self::Value]) {
+    let (packed, suffix) = buf.split_at_mut(buf.len() - buf.len() % Self::WIDTH);
+    (Self::pack_slice_mut(packed), suffix)
+  }
+
+  fn unpack_slice(buf: &[Self]) -> &[Self::Value] {
+    assert!(mem::align_of::<Self>() >= mem::align_of::<Self::Value>());
+    let buf_ptr = buf.as_ptr().cast::<Self::Value>();
+    let n = buf.len() * Self::WIDTH;
+    unsafe { slice::from_raw_parts(buf_ptr, n) }
+  }
+}
+
+/// # Safety
+/// - See `PackedValue` above.
+pub unsafe trait PackedField: AbstractField<F = Self::Scalar>
+    + PackedValue<Value = Self::Scalar>
+    + From<Self::Scalar>
+    + Add<Self::Scalar, Output = Self>
+    + AddAssign<Self::Scalar>
+    + Sub<Self::Scalar, Output = Self>
+    + SubAssign<Self::Scalar>
+    + Mul<Self::Scalar, Output = Self>
+    + MulAssign<Self::Scalar>
+    // TODO: Implement packed / packed division
+    + Div<Self::Scalar, Output = Self>
+{
+    type Scalar: Field + Add<Self, Output = Self> + Mul<Self, Output = Self> + Sub<Self, Output = Self>;
+
+
+
+    /// Take interpret two vectors as chunks of `block_len` elements. Unpack and interleave those
+    /// chunks. This is best seen with an example. If we have:
+    /// ```text
+    /// A = [x0, y0, x1, y1]
+    /// B = [x2, y2, x3, y3]
+    /// ```
+    ///
+    /// then
+    ///
+    /// ```text
+    /// interleave(A, B, 1) = ([x0, x2, x1, x3], [y0, y2, y1, y3])
+    /// ```
+    ///
+    /// Pairs that were adjacent in the input are at corresponding positions in the output.
+    ///
+    /// `r` lets us set the size of chunks we're interleaving. If we set `block_len = 2`, then for
+    ///
+    /// ```text
+    /// A = [x0, x1, y0, y1]
+    /// B = [x2, x3, y2, y3]
+    /// ```
+    ///
+    /// we obtain
+    ///
+    /// ```text
+    /// interleave(A, B, block_len) = ([x0, x1, x2, x3], [y0, y1, y2, y3])
+    /// ```
+    ///
+    /// We can also think about this as stacking the vectors, dividing them into 2x2 matrices, and
+    /// transposing those matrices.
+    ///
+    /// When `block_len = WIDTH`, this operation is a no-op. `block_len` must divide `WIDTH`. Since
+    /// `WIDTH` is specified to be a power of 2, `block_len` must also be a power of 2. It cannot be
+    /// 0 and it cannot exceed `WIDTH`.
+    fn interleave(&self, other: Self, block_len: usize) -> (Self, Self);
+}
+
+unsafe impl<T: Packable> PackedValue for T {
+  type Value = Self;
+
+  const WIDTH: usize = 1;
+
+  fn from_slice(slice: &[Self::Value]) -> &Self { &slice[0] }
+
+  fn from_slice_mut(slice: &mut [Self::Value]) -> &mut Self { &mut slice[0] }
+
+  fn from_fn<Fn>(mut f: Fn) -> Self
+  where Fn: FnMut(usize) -> Self::Value {
+    f(0)
+  }
+
+  fn as_slice(&self) -> &[Self::Value] { slice::from_ref(self) }
+
+  fn as_slice_mut(&mut self) -> &mut [Self::Value] { slice::from_mut(self) }
+}
+
+unsafe impl<F: Field> PackedField for F {
+  type Scalar = Self;
+
+  fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) {
+    match block_len {
+      1 => (*self, other),
+      _ => panic!("unsupported block length"),
+    }
+  }
+}
+
+impl Packable for u8 {}
+
+impl Packable for u16 {}
+
+impl Packable for u32 {}
+
+impl Packable for u64 {}
+
+impl Packable for u128 {}
diff --git a/APACHE2 b/ronkathon/APACHE2
similarity index 100%
rename from APACHE2
rename to ronkathon/APACHE2
diff --git a/ronkathon/Cargo.toml b/ronkathon/Cargo.toml
new file mode 100644
index 00000000..6443ab10
--- /dev/null
+++ b/ronkathon/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+authors    =["Pluto Authors"]
+description="""ronkathon"""
+edition    ="2021"
+license    ="Apache2.0 OR MIT"
+name       ="ronkathon"
+repository ="https://github.com/thor314/ronkathon"
+version    ="0.1.0"
+
+[dependencies]
+anyhow   ="1.0"
+p3-field = { path = "../field" }
diff --git a/LICENSE-MIT b/ronkathon/LICENSE-MIT
similarity index 100%
rename from LICENSE-MIT
rename to ronkathon/LICENSE-MIT
diff --git a/README.md b/ronkathon/README.md
similarity index 100%
rename from README.md
rename to ronkathon/README.md
diff --git a/rust-toolchain.toml b/ronkathon/rust-toolchain.toml
similarity index 100%
rename from rust-toolchain.toml
rename to ronkathon/rust-toolchain.toml
diff --git a/src/curve.rs b/ronkathon/src/curve.rs
similarity index 100%
rename from src/curve.rs
rename to ronkathon/src/curve.rs
diff --git a/src/lib.rs b/ronkathon/src/lib.rs
similarity index 100%
rename from src/lib.rs
rename to ronkathon/src/lib.rs
diff --git a/util/Cargo.toml b/util/Cargo.toml
new file mode 100644
index 00000000..21152b35
--- /dev/null
+++ b/util/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "p3-util"
+version = "0.1.0"
+edition = "2021"
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+serde = { version = "1.0", default-features = false }
diff --git a/util/src/array_serialization.rs b/util/src/array_serialization.rs
new file mode 100644
index 00000000..4754dddf
--- /dev/null
+++ b/util/src/array_serialization.rs
@@ -0,0 +1,53 @@
+use alloc::vec::Vec;
+use core::marker::PhantomData;
+
+use serde::{
+  de::{SeqAccess, Visitor},
+  ser::SerializeTuple,
+  Deserialize, Deserializer, Serialize, Serializer,
+};
+
+pub fn serialize<S: Serializer, T: Serialize, const N: usize>(
+  data: &[T; N],
+  ser: S,
+) -> Result<S::Ok, S::Error> {
+  let mut s = ser.serialize_tuple(N)?;
+  for item in data {
+    s.serialize_element(item)?;
+  }
+  s.end()
+}
+
+struct ArrayVisitor<T, const N: usize>(PhantomData<T>);
+
+impl<'de, T, const N: usize> Visitor<'de> for ArrayVisitor<T, N>
+where T: Deserialize<'de>
+{
+  type Value = [T; N];
+
+  fn expecting(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+    formatter.write_fmt(format_args!("an array of length {}", N))
+  }
+
+  #[inline]
+  fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+  where A: SeqAccess<'de> {
+    let mut data = Vec::with_capacity(N);
+    for _ in 0..N {
+      match seq.next_element()? {
+        Some(val) => data.push(val),
+        None => return Err(serde::de::Error::invalid_length(N, &self)),
+      }
+    }
+    match data.try_into() {
+      Ok(arr) => Ok(arr),
+      Err(_) => unreachable!(),
+    }
+  }
+}
+pub fn deserialize<'de, D, T, const N: usize>(deserializer: D) -> Result<[T; N], D::Error>
+where
+  D: Deserializer<'de>,
+  T: Deserialize<'de>, {
+  deserializer.deserialize_tuple(N, ArrayVisitor::<T, N>(PhantomData))
+}
diff --git a/util/src/lib.rs b/util/src/lib.rs
new file mode 100644
index 00000000..359a4070
--- /dev/null
+++ b/util/src/lib.rs
@@ -0,0 +1,145 @@
+//! Various simple utilities.
+
+#![no_std]
+
+extern crate alloc;
+
+use alloc::vec::Vec;
+use core::hint::unreachable_unchecked;
+
+pub mod array_serialization;
+pub mod linear_map;
+
+/// Computes `ceil(a / b)`. Assumes `a + b` does not overflow.
+#[must_use]
+pub const fn ceil_div_usize(a: usize, b: usize) -> usize { (a + b - 1) / b }
+
+/// Computes `ceil(log_2(n))`.
+#[must_use]
+pub const fn log2_ceil_usize(n: usize) -> usize {
+  (usize::BITS - n.saturating_sub(1).leading_zeros()) as usize
+}
+
+#[must_use]
+pub fn log2_ceil_u64(n: u64) -> u64 { (u64::BITS - n.saturating_sub(1).leading_zeros()).into() }
+
+/// Computes `log_2(n)`
+///
+/// # Panics
+/// Panics if `n` is not a power of two.
+#[must_use]
+#[inline]
+pub fn log2_strict_usize(n: usize) -> usize {
+  let res = n.trailing_zeros();
+  assert_eq!(n.wrapping_shr(res), 1, "Not a power of two: {n}");
+  res as usize
+}
+
+/// Returns `[0, ..., N - 1]`.
+#[must_use]
+pub const fn indices_arr<const N: usize>() -> [usize; N] {
+  let mut indices_arr = [0; N];
+  let mut i = 0;
+  while i < N {
+    indices_arr[i] = i;
+    i += 1;
+  }
+  indices_arr
+}
+
+#[inline]
+pub const fn reverse_bits(x: usize, n: usize) -> usize {
+  reverse_bits_len(x, n.trailing_zeros() as usize)
+}
+
+#[inline]
+pub const fn reverse_bits_len(x: usize, bit_len: usize) -> usize {
+  // NB: The only reason we need overflowing_shr() here as opposed
+  // to plain '>>' is to accommodate the case n == num_bits == 0,
+  // which would become `0 >> 64`. Rust thinks that any shift of 64
+  // bits causes overflow, even when the argument is zero.
+  x.reverse_bits().overflowing_shr(usize::BITS - bit_len as u32).0
+}
+
+pub fn reverse_slice_index_bits<F>(vals: &mut [F]) {
+  let n = vals.len();
+  if n == 0 {
+    return;
+  }
+  let log_n = log2_strict_usize(n);
+
+  for i in 0..n {
+    let j = reverse_bits_len(i, log_n);
+    if i < j {
+      vals.swap(i, j);
+    }
+  }
+}
+
+#[inline(always)]
+pub fn assume(p: bool) {
+  debug_assert!(p);
+  if !p {
+    unsafe {
+      unreachable_unchecked();
+    }
+  }
+}
+
+/// Try to force Rust to emit a branch. Example:
+///
+/// ```no_run
+/// let x = 100;
+/// if x > 20 {
+///   println!("x is big!");
+///   p3_util::branch_hint();
+/// } else {
+///   println!("x is small!");
+/// }
+/// ```
+///
+/// This function has no semantics. It is a hint only.
+#[inline(always)]
+pub fn branch_hint() {
+  // NOTE: These are the currently supported assembly architectures. See the
+  // [nightly reference](https://doc.rust-lang.org/nightly/reference/inline-assembly.html) for
+  // the most up-to-date list.
+  #[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm",
+    target_arch = "riscv32",
+    target_arch = "riscv64",
+    target_arch = "x86",
+    target_arch = "x86_64",
+  ))]
+  unsafe {
+    core::arch::asm!("", options(nomem, nostack, preserves_flags));
+  }
+}
+
+/// Convenience methods for Vec.
+pub trait VecExt<T> {
+  /// Push `elem` and return a reference to it.
+  fn pushed_ref(&mut self, elem: T) -> &T;
+  /// Push `elem` and return a mutable reference to it.
+  fn pushed_mut(&mut self, elem: T) -> &mut T;
+}
+
+impl<T> VecExt<T> for alloc::vec::Vec<T> {
+  fn pushed_ref(&mut self, elem: T) -> &T {
+    self.push(elem);
+    self.last().unwrap()
+  }
+
+  fn pushed_mut(&mut self, elem: T) -> &mut T {
+    self.push(elem);
+    self.last_mut().unwrap()
+  }
+}
+
+pub fn transpose_vec<T>(v: Vec<Vec<T>>) -> Vec<Vec<T>> {
+  assert!(!v.is_empty());
+  let len = v[0].len();
+  let mut iters: Vec<_> = v.into_iter().map(|n| n.into_iter()).collect();
+  (0..len).map(|_| iters.iter_mut().map(|n| n.next().unwrap()).collect::<Vec<T>>()).collect()
+}
diff --git a/util/src/linear_map.rs b/util/src/linear_map.rs
new file mode 100644
index 00000000..6dc521d9
--- /dev/null
+++ b/util/src/linear_map.rs
@@ -0,0 +1,65 @@
+use alloc::vec::Vec;
+use core::mem;
+
+use crate::VecExt;
+
+/// O(n) Vec-backed map for keys that only implement Eq.
+/// Only use this for a very small number of keys, operations
+/// on it can easily become O(n^2).
+#[derive(Debug)]
+pub struct LinearMap<K, V>(Vec<(K, V)>);
+
+impl<K, V> Default for LinearMap<K, V> {
+  fn default() -> Self { Self(Default::default()) }
+}
+
+impl<K: Eq, V> LinearMap<K, V> {
+  pub fn new() -> Self { Default::default() }
+
+  pub fn get(&self, k: &K) -> Option<&V> { self.0.iter().find(|(kk, _)| kk == k).map(|(_, v)| v) }
+
+  pub fn get_mut(&mut self, k: &K) -> Option<&mut V> {
+    self.0.iter_mut().find(|(kk, _)| kk == k).map(|(_, v)| v)
+  }
+
+  /// This is O(n), because we check for an existing entry.
+  pub fn insert(&mut self, k: K, mut v: V) -> Option<V> {
+    if let Some(vv) = self.get_mut(&k) {
+      mem::swap(&mut v, vv);
+      Some(v)
+    } else {
+      self.0.push((k, v));
+      None
+    }
+  }
+
+  pub fn get_or_insert_with(&mut self, k: K, f: impl FnOnce() -> V) -> &mut V {
+    let existing = self.0.iter().position(|(kk, _)| kk == &k);
+    if let Some(idx) = existing {
+      &mut self.0[idx].1
+    } else {
+      let slot = self.0.pushed_mut((k, f()));
+      &mut slot.1
+    }
+  }
+
+  pub fn values(&self) -> impl Iterator<Item = &V> { self.0.iter().map(|(_, v)| v) }
+}
+
+impl<K: Eq, V> FromIterator<(K, V)> for LinearMap<K, V> {
+  /// This calls `insert` in a loop, so is O(n^2)!!
+  fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
+    let mut me = LinearMap::default();
+    for (k, v) in iter {
+      me.insert(k, v);
+    }
+    me
+  }
+}
+
+impl<K, V> IntoIterator for LinearMap<K, V> {
+  type IntoIter = <Vec<(K, V)> as IntoIterator>::IntoIter;
+  type Item = (K, V);
+
+  fn into_iter(self) -> Self::IntoIter { self.0.into_iter() }
+}