Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plonky3 deps added #4

Merged
merged 2 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
[package]
authors =["Pluto Authors"]
description="""ronkathon"""
edition ="2021"
license ="Apache2.0 OR MIT"
name ="ronkathon"
repository ="https://github.com/thor314/ronkathon"
version ="0.1.0"
[workspace]
resolver = "2"

[dependencies]
anyhow ="1.0"
members = [
"ronkathon",
"field",
"util"
]
14 changes: 14 additions & 0 deletions field/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "p3-field"
version = "0.1.0"
edition = "2021"
license = "MIT OR Apache-2.0"

[dependencies]
p3-util = { path = "../util" }
num-bigint = { version = "0.4.3", default-features = false }
num-traits = { version = "0.2.18", default-features = false }

itertools = "0.12.0"
rand = "0.8.5"
serde = { version = "1.0", default-features = false, features = ["derive"] }
148 changes: 148 additions & 0 deletions field/src/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
use core::{
array,
iter::{Product, Sum},
ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign},
};

use crate::{AbstractField, Field};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct FieldArray<F: Field, const N: usize>(pub [F; N]);

impl<F: Field, const N: usize> Default for FieldArray<F, N> {
fn default() -> Self { Self::zero() }
}

impl<F: Field, const N: usize> From<F> for FieldArray<F, N> {
fn from(val: F) -> Self { [val; N].into() }
}

impl<F: Field, const N: usize> From<[F; N]> for FieldArray<F, N> {
fn from(arr: [F; N]) -> Self { Self(arr) }
}

impl<F: Field, const N: usize> AbstractField for FieldArray<F, N> {
type F = F;

fn zero() -> Self { FieldArray([F::zero(); N]) }

fn one() -> Self { FieldArray([F::one(); N]) }

fn two() -> Self { FieldArray([F::two(); N]) }

fn neg_one() -> Self { FieldArray([F::neg_one(); N]) }

#[inline]
fn from_f(f: Self::F) -> Self { f.into() }

fn from_bool(b: bool) -> Self { [F::from_bool(b); N].into() }

fn from_canonical_u8(n: u8) -> Self { [F::from_canonical_u8(n); N].into() }

fn from_canonical_u16(n: u16) -> Self { [F::from_canonical_u16(n); N].into() }

fn from_canonical_u32(n: u32) -> Self { [F::from_canonical_u32(n); N].into() }

fn from_canonical_u64(n: u64) -> Self { [F::from_canonical_u64(n); N].into() }

fn from_canonical_usize(n: usize) -> Self { [F::from_canonical_usize(n); N].into() }

fn from_wrapped_u32(n: u32) -> Self { [F::from_wrapped_u32(n); N].into() }

fn from_wrapped_u64(n: u64) -> Self { [F::from_wrapped_u64(n); N].into() }

fn generator() -> Self { [F::generator(); N].into() }
}

impl<F: Field, const N: usize> Add for FieldArray<F, N> {
type Output = Self;

#[inline]
fn add(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] + rhs.0[i]).into() }
}

impl<F: Field, const N: usize> Add<F> for FieldArray<F, N> {
type Output = Self;

#[inline]
fn add(self, rhs: F) -> Self::Output { self.0.map(|x| x + rhs).into() }
}

impl<F: Field, const N: usize> AddAssign for FieldArray<F, N> {
#[inline]
fn add_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x += y); }
}

impl<F: Field, const N: usize> AddAssign<F> for FieldArray<F, N> {
#[inline]
fn add_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x += rhs); }
}

impl<F: Field, const N: usize> Sub for FieldArray<F, N> {
type Output = Self;

#[inline]
fn sub(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] - rhs.0[i]).into() }
}

impl<F: Field, const N: usize> Sub<F> for FieldArray<F, N> {
type Output = Self;

#[inline]
fn sub(self, rhs: F) -> Self::Output { self.0.map(|x| x - rhs).into() }
}

impl<F: Field, const N: usize> SubAssign for FieldArray<F, N> {
#[inline]
fn sub_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x -= y); }
}

impl<F: Field, const N: usize> SubAssign<F> for FieldArray<F, N> {
#[inline]
fn sub_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x -= rhs); }
}

impl<F: Field, const N: usize> Neg for FieldArray<F, N> {
type Output = Self;

#[inline]
fn neg(self) -> Self::Output { self.0.map(|x| -x).into() }
}

impl<F: Field, const N: usize> Mul for FieldArray<F, N> {
type Output = Self;

#[inline]
fn mul(self, rhs: Self) -> Self::Output { array::from_fn(|i| self.0[i] * rhs.0[i]).into() }
}

impl<F: Field, const N: usize> Mul<F> for FieldArray<F, N> {
type Output = Self;

#[inline]
fn mul(self, rhs: F) -> Self::Output { self.0.map(|x| x * rhs).into() }
}

impl<F: Field, const N: usize> MulAssign for FieldArray<F, N> {
#[inline]
fn mul_assign(&mut self, rhs: Self) { self.0.iter_mut().zip(rhs.0).for_each(|(x, y)| *x *= y); }
}

impl<F: Field, const N: usize> MulAssign<F> for FieldArray<F, N> {
#[inline]
fn mul_assign(&mut self, rhs: F) { self.0.iter_mut().for_each(|x| *x *= rhs); }
}

impl<F: Field, const N: usize> Sum for FieldArray<F, N> {
#[inline]
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
iter.reduce(|lhs, rhs| lhs + rhs).unwrap_or(Self::zero())
}
}

impl<F: Field, const N: usize> Product for FieldArray<F, N> {
#[inline]
fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
iter.reduce(|lhs, rhs| lhs * rhs).unwrap_or(Self::one())
}
}
93 changes: 93 additions & 0 deletions field/src/batch_inverse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use alloc::{vec, vec::Vec};

use crate::field::Field;

/// Batch multiplicative inverses with Montgomery's trick
/// This is Montgomery's trick. At a high level, we invert the product of the given field
/// elements, then derive the individual inverses from that via multiplication.
///
/// The usual Montgomery trick involves calculating an array of cumulative products,
/// resulting in a long dependency chain. To increase instruction-level parallelism, we
/// compute WIDTH separate cumulative product arrays that only meet at the end.
///
/// # Panics
/// Might panic if asserts or unwraps uncover a bug.
pub fn batch_multiplicative_inverse<F: Field>(x: &[F]) -> Vec<F> {
// Higher WIDTH increases instruction-level parallelism, but too high a value will cause us
// to run out of registers.
const WIDTH: usize = 4;
// JN note: WIDTH is 4. The code is specialized to this value and will need
// modification if it is changed. I tried to make it more generic, but Rust's const
// generics are not yet good enough.

// Handle special cases. Paradoxically, below is repetitive but concise.
// The branches should be very predictable.
let n = x.len();
if n == 0 {
return Vec::new();
} else if n == 1 {
return vec![x[0].inverse()];
} else if n == 2 {
let x01 = x[0] * x[1];
let x01inv = x01.inverse();
return vec![x01inv * x[1], x01inv * x[0]];
} else if n == 3 {
let x01 = x[0] * x[1];
let x012 = x01 * x[2];
let x012inv = x012.inverse();
let x01inv = x012inv * x[2];
return vec![x01inv * x[1], x01inv * x[0], x012inv * x01];
}
debug_assert!(n >= WIDTH);

// Buf is reused for a few things to save allocations.
// Fill buf with cumulative product of x, only taking every 4th value. Concretely, buf will
// be [
// x[0], x[1], x[2], x[3],
// x[0] * x[4], x[1] * x[5], x[2] * x[6], x[3] * x[7],
// x[0] * x[4] * x[8], x[1] * x[5] * x[9], x[2] * x[6] * x[10], x[3] * x[7] * x[11],
// ...
// ].
// If n is not a multiple of WIDTH, the result is truncated from the end. For example,
// for n == 5, we get [x[0], x[1], x[2], x[3], x[0] * x[4]].
let mut buf: Vec<F> = Vec::with_capacity(n);
// cumul_prod holds the last WIDTH elements of buf. This is redundant, but it's how we
// convince LLVM to keep the values in the registers.
let mut cumul_prod: [F; WIDTH] = x[..WIDTH].try_into().unwrap();
buf.extend(cumul_prod);
for (i, &xi) in x[WIDTH..].iter().enumerate() {
cumul_prod[i % WIDTH] *= xi;
buf.push(cumul_prod[i % WIDTH]);
}
debug_assert_eq!(buf.len(), n);

let mut a_inv = {
// This is where the four dependency chains meet.
// Take the last four elements of buf and invert them all.
let c01 = cumul_prod[0] * cumul_prod[1];
let c23 = cumul_prod[2] * cumul_prod[3];
let c0123 = c01 * c23;
let c0123inv = c0123.inverse();
let c01inv = c0123inv * c23;
let c23inv = c0123inv * c01;
[c01inv * cumul_prod[1], c01inv * cumul_prod[0], c23inv * cumul_prod[3], c23inv * cumul_prod[2]]
};

for i in (WIDTH..n).rev() {
// buf[i - WIDTH] has not been written to by this loop, so it equals
// x[i % WIDTH] * x[i % WIDTH + WIDTH] * ... * x[i - WIDTH].
buf[i] = buf[i - WIDTH] * a_inv[i % WIDTH];
// buf[i] now holds the inverse of x[i].
a_inv[i % WIDTH] *= x[i];
}
for i in (0..WIDTH).rev() {
buf[i] = a_inv[i];
}

for (&bi, &xi) in buf.iter().zip(x) {
// Sanity check only.
debug_assert_eq!(bi * xi, F::one());
}

buf
}
122 changes: 122 additions & 0 deletions field/src/exponentiation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use crate::AbstractField;

pub fn exp_u64_by_squaring<AF: AbstractField>(val: AF, power: u64) -> AF {
let mut current = val;
let mut product = AF::one();

for j in 0..bits_u64(power) {
if (power >> j & 1) != 0 {
product *= current.clone();
}
current = current.square();
}
product
}

const fn bits_u64(n: u64) -> usize { (64 - n.leading_zeros()) as usize }

pub fn exp_1717986917<AF: AbstractField>(val: AF) -> AF {
// Note that 5 * 1717986917 = 4*(2^31 - 2) + 1 = 1 mod p - 1.
// Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1717986917})^5 = a.
// Note the binary expansion: 1717986917 = 1100110011001100110011001100101_2
// This uses 30 Squares + 7 Multiplications => 37 Operations total.
// Suspect it's possible to improve this with enough effort. For example 1717986918 takes only 4
// Multiplications.
let p1 = val;
let p10 = p1.square();
let p11 = p10.clone() * p1;
let p101 = p10 * p11.clone();
let p110000 = p11.exp_power_of_2(4);
let p110011 = p110000 * p11.clone();
let p11001100000000 = p110011.exp_power_of_2(8);
let p11001100110011 = p11001100000000.clone() * p110011;
let p1100110000000000000000 = p11001100000000.exp_power_of_2(8);
let p1100110011001100110011 = p1100110000000000000000 * p11001100110011;
let p11001100110011001100110000 = p1100110011001100110011.exp_power_of_2(4);
let p11001100110011001100110011 = p11001100110011001100110000 * p11;
let p1100110011001100110011001100000 = p11001100110011001100110011.exp_power_of_2(5);
p1100110011001100110011001100000 * p101
}

pub fn exp_1420470955<AF: AbstractField>(val: AF) -> AF {
// Note that 3 * 1420470955 = 2*(2^31 - 2^24) + 1 = 1 mod (p - 1).
// Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1420470955})^3 = a.
// Note the binary expansion: 1420470955 = 1010100101010101010101010101011_2
// This uses 29 Squares + 7 Multiplications => 36 Operations total.
// Suspect it's possible to improve this with enough effort.
let p1 = val;
let p100 = p1.exp_power_of_2(2);
let p101 = p100.clone() * p1.clone();
let p10000 = p100.exp_power_of_2(2);
let p10101 = p10000 * p101;
let p10101000000 = p10101.clone().exp_power_of_2(6);
let p10101010101 = p10101000000.clone() * p10101.clone();
let p101010010101 = p10101000000 * p10101010101.clone();
let p101010010101000000000000 = p101010010101.exp_power_of_2(12);
let p101010010101010101010101 = p101010010101000000000000 * p10101010101;
let p101010010101010101010101000000 = p101010010101010101010101.exp_power_of_2(6);
let p101010010101010101010101010101 = p101010010101010101010101000000 * p10101;
let p1010100101010101010101010101010 = p101010010101010101010101010101.square();
p1010100101010101010101010101010 * p1.clone()
}

pub fn exp_1725656503<AF: AbstractField>(val: AF) -> AF {
// Note that 7 * 1725656503 = 6*(2^31 - 2^27) + 1 = 1 mod (p - 1).
// Thus as a^{p - 1} = 1 for all a \in F_p, (a^{1725656503})^7 = a.
// Note the binary expansion: 1725656503 = 1100110110110110110110110110111_2
// This uses 29 Squares + 8 Multiplications => 37 Operations total.
// Suspect it's possible to improve this with enough effort.
let p1 = val;
let p10 = p1.square();
let p11 = p10 * p1.clone();
let p110 = p11.square();
let p111 = p110.clone() * p1;
let p11000 = p110.exp_power_of_2(2);
let p11011 = p11000.clone() * p11;
let p11000000 = p11000.exp_power_of_2(3);
let p11011011 = p11000000.clone() * p11011;
let p110011011 = p11011011.clone() * p11000000;
let p110011011000000000 = p110011011.exp_power_of_2(9);
let p110011011011011011 = p110011011000000000 * p11011011.clone();
let p110011011011011011000000000 = p110011011011011011.exp_power_of_2(9);
let p110011011011011011011011011 = p110011011011011011000000000 * p11011011;
let p1100110110110110110110110110000 = p110011011011011011011011011.exp_power_of_2(4);
p1100110110110110110110110110000 * p111
}

pub fn exp_10540996611094048183<AF: AbstractField>(val: AF) -> AF {
// Note that 7*10540996611094048183 = 4*(2^64 - 2**32) + 1 = 1 mod (p - 1).
// Thus as a^{p - 1} = 1 for all a \in F_p, (a^{10540996611094048183})^7 = a.
// Also: 10540996611094048183 =
// 1001001001001001001001001001000110110110110110110110110110110111_2. This uses 63 Squares + 8
// Multiplications => 71 Operations total. Suspect it's possible to improve this a little with
// enough effort.
let p1 = val;
let p10 = p1.square();
let p11 = p10.clone() * p1.clone();
let p100 = p10.square();
let p111 = p100.clone() * p11.clone();
let p100000000000000000000000000000000 = p100.exp_power_of_2(30);
let p100000000000000000000000000000011 = p100000000000000000000000000000000 * p11;
let p100000000000000000000000000000011000 = p100000000000000000000000000000011.exp_power_of_2(3);
let p100100000000000000000000000000011011 =
p100000000000000000000000000000011000 * p100000000000000000000000000000011;
let p100100000000000000000000000000011011000000 =
p100100000000000000000000000000011011.exp_power_of_2(6);
let p100100100100000000000000000000011011011011 =
p100100000000000000000000000000011011000000 * p100100000000000000000000000000011011.clone();
let p100100100100000000000000000000011011011011000000000000 =
p100100100100000000000000000000011011011011.exp_power_of_2(12);
let p100100100100100100100100000000011011011011011011011011 =
p100100100100000000000000000000011011011011000000000000
* p100100100100000000000000000000011011011011;
let p100100100100100100100100000000011011011011011011011011000000 =
p100100100100100100100100000000011011011011011011011011.exp_power_of_2(6);
let p100100100100100100100100100100011011011011011011011011011011 =
p100100100100100100100100000000011011011011011011011011000000
* p100100000000000000000000000000011011;
let p1001001001001001001001001001000110110110110110110110110110110000 =
p100100100100100100100100100100011011011011011011011011011011.exp_power_of_2(4);

p1001001001001001001001001001000110110110110110110110110110110000 * p111
}
Loading