Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 104 additions & 2 deletions benches/experimental_benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ use efficient_sumcheck::{
m31::{
evaluate_bf::evaluate_bf, evaluate_ef::evaluate_ef, reduce_bf::reduce_bf,
reduce_ef::reduce_ef, sumcheck,
},
}
},
multilinear::{pairwise, ReduceMode, TimeProver},
prover::Prover,
tests::{BenchStream, Fp4SmallM31, SmallM31, F128},
tests::{BenchStream, Fp4SmallM31, SmallM31, F128, SmallGoldilocks, Fp2SmallGoldilocks},
Sumcheck,
};

Expand Down Expand Up @@ -181,6 +181,107 @@ fn bench_reduce_ef(c: &mut Criterion) {
});
}


fn bench_reduce_ef_goldilocks(c: &mut Criterion) {

use efficient_sumcheck::experimental::goldilocks::reduce_ef::reduce_ef;
const LEN_XSMALL: usize = 1 << 10; // 1K
const LEN_SMALL: usize = 1 << 14; // 16K
const LEN_MED: usize = 1 << 16; // 64K
const LEN_LARGE: usize = 1 << 18; // 256K
const LEN_XLARGE: usize = 1 << 20; // 1M

let mut rng = test_rng();

// Shared input vector in the base field
let src_xsmall: Vec<Fp2SmallGoldilocks> = (0..LEN_XSMALL)
.map(|_| Fp2SmallGoldilocks::rand(&mut rng))
.collect();
let src_small: Vec<Fp2SmallGoldilocks> = (0..LEN_SMALL)
.map(|_| Fp2SmallGoldilocks::rand(&mut rng))
.collect();
let src_med: Vec<Fp2SmallGoldilocks> = (0..LEN_MED).map(|_| Fp2SmallGoldilocks::rand(&mut rng)).collect();
let src_large: Vec<Fp2SmallGoldilocks> = (0..LEN_LARGE)
.map(|_| Fp2SmallGoldilocks::rand(&mut rng))
.collect();
let src_xlarge: Vec<Fp2SmallGoldilocks> = (0..LEN_XLARGE)
.map(|_| Fp2SmallGoldilocks::rand(&mut rng))
.collect();

let challenge_ef = Fp2SmallGoldilocks::from(7);

// This should be faster
c.bench_function("reduce_ef::goldilocks::reduce_1K", |b| {
b.iter(|| {
let mut v = src_xsmall.clone();
reduce_ef(black_box(&mut v), challenge_ef);
});
});

c.bench_function("reduce_ef::goldilocks::reduce_16K", |b| {
b.iter(|| {
let mut v = src_small.clone();
reduce_ef(black_box(&mut v), challenge_ef);
});
});

c.bench_function("reduce_ef::goldilocks::reduce_64K", |b| {
b.iter(|| {
let mut v = src_med.clone();
reduce_ef(black_box(&mut v), challenge_ef);
});
});

c.bench_function("reduce_ef::goldilocks::reduce_256K", |b| {
b.iter(|| {
let mut v = src_large.clone();
reduce_ef(black_box(&mut v), challenge_ef);
});
});

c.bench_function("reduce_ef::goldilocks::reduce_1M", |b| {
b.iter(|| {
let mut v = src_xlarge.clone();
reduce_ef(black_box(&mut v), challenge_ef);
});
});

c.bench_function("ef_pairwise::reduce_1K", |b| {
b.iter(|| {
let mut v = src_xsmall.clone();
pairwise::reduce_evaluations(black_box(&mut v), challenge_ef);
});
});

c.bench_function("ef_pairwise::reduce_16K", |b| {
b.iter(|| {
let mut v = src_small.clone();
pairwise::reduce_evaluations(black_box(&mut v), challenge_ef);
});
});

c.bench_function("ef_pairwise::reduce_64K", |b| {
b.iter(|| {
let mut v = src_med.clone();
pairwise::reduce_evaluations(black_box(&mut v), challenge_ef);
});
});

c.bench_function("ef_pairwise::reduce_256K", |b| {
b.iter(|| {
let mut v = src_large.clone();
pairwise::reduce_evaluations(black_box(&mut v), challenge_ef);
});
});

c.bench_function("ef_pairwise::reduce_1M", |b| {
b.iter(|| {
let mut v = src_xlarge.clone();
pairwise::reduce_evaluations(black_box(&mut v), challenge_ef);
});
});
}

fn bench_reduce_bf(c: &mut Criterion) {
const LEN_XSMALL: usize = 1 << 10; // 1K
const LEN_SMALL: usize = 1 << 14; // 16K
Expand Down Expand Up @@ -476,5 +577,6 @@ criterion_group!(
bench_evaluate_bf,
bench_evaluate_ef,
bench_reduce_ef,
bench_reduce_ef_goldilocks
);
criterion_main!(benches);
57 changes: 57 additions & 0 deletions src/experimental/goldilocks/arithmetic/add.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use ark_std::simd::{cmp::SimdPartialOrd, LaneCount, Simd, SupportedLaneCount};
use super::super::{MODULUS, EPSILON};
use crate::experimental::goldilocks::utils::{assume, branch_hint};

// https://github.com/zhenfeizhang/Goldilocks/blob/872114997b82d0157e29a702992a3bd2023aa7ba/src/primefield/fp.rs#L377
#[inline(always)]
pub fn add(a: u64, b: u64) -> u64 {
let (sum, over) = a.overflowing_add(b);
let (mut sum, over) = sum.overflowing_add((over as u64) * EPSILON);
if over {
// NB: a > Self::ORDER && b > Self::ORDER is necessary but not sufficient for double-overflow.
// This assume does two things:
// 1. If compiler knows that either a or b <= ORDER, then it can skip this check.
// 2. Hints to the compiler how rare this double-overflow is (thus handled better with a branch).
assume(a > MODULUS && b > MODULUS);
branch_hint();
sum += EPSILON; // Cannot overflow.
}
sum
}

#[inline(always)]
pub fn add_v<const LANES: usize>(a: &Simd<u64, LANES>, b: &Simd<u64, LANES>) -> Simd<u64, LANES>
where
LaneCount<LANES>: SupportedLaneCount,
{
let modulus = Simd::<u64, LANES>::splat(MODULUS);
let epsilon = Simd::<u64, LANES>::splat(EPSILON);
let sum = a + b;

// 2. Detect where overflow occurred (a + b >= 2^64)
// In SIMD, if the sum is less than one of the inputs, an overflow happened.
let overflow_mask = sum.simd_lt(*a);

// 3. Add epsilon to lanes that overflowed
let mut res = overflow_mask.select(sum + epsilon, sum);

// 4. Final canonical reduction: if res >= modulus { res - modulus }
res = res.simd_ge(modulus).select(res - modulus, res);

res
}


#[cfg(test)]
mod tests {
use super::add_v;
use ark_std::simd::Simd;

#[test]
fn sanity() {
let a: [u64; 1] = [9];
let b: [u64; 1] = [7];
let sum = add_v(&Simd::from_array(a), &Simd::from_array(b));
assert_eq!(sum[0], 16);
}
}
3 changes: 3 additions & 0 deletions src/experimental/goldilocks/arithmetic/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod add;
pub mod mul;
pub mod sub;
142 changes: 142 additions & 0 deletions src/experimental/goldilocks/arithmetic/mul.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
use std::simd::{Mask};

use ark_std::{
mem,
simd::{cmp::SimdPartialOrd, LaneCount, Simd, SupportedLaneCount},
};

use crate::tests::SmallGoldilocks;
use super::super::{MODULUS, EPSILON};

pub fn mul(a: u64, b: u64) -> u64 {
let prod = unsafe { mem::transmute::<u64, SmallGoldilocks>(a) }
* unsafe { mem::transmute::<u64, SmallGoldilocks>(b) };
unsafe { mem::transmute::<SmallGoldilocks, u64>(prod) }
}


#[inline(always)]
pub fn mul_v<const LANES: usize>(
a: &Simd<u64, LANES>,
b: &Simd<u64, LANES>,
) -> Simd<u64, LANES>
where
LaneCount<LANES>: SupportedLaneCount,
{
let mask32 = Simd::splat(0xFFFFFFFFu64);

let a_lo = *a & mask32;
let a_hi = *a >> 32;
let b_lo = *b & mask32;
let b_hi = *b >> 32;

let lo_lo = a_lo * b_lo;
let lo_hi = a_lo * b_hi;
let hi_lo = a_hi * b_lo;
let hi_hi = a_hi * b_hi;

let mid = lo_hi + hi_lo;
let mid_carry = mid.simd_lt(lo_hi).select(Simd::splat(1 << 32), Simd::splat(0));

let mid_lo = mid & mask32;
let mid_hi = mid >> 32;

let x_lo = lo_lo + (mid_lo << 32);
let x_lo_carry = x_lo.simd_lt(lo_lo).select(Simd::splat(1), Simd::splat(0));
let x_hi = hi_hi + mid_hi + mid_carry + x_lo_carry;

let x_hi_hi = x_hi >> 32;
let x_hi_lo = x_hi & mask32;


let mut t0 = x_lo - x_hi_hi;
let borrow_mask = x_lo.simd_lt(x_hi_hi);
t0 = borrow_mask.select(t0 - Simd::splat(EPSILON), t0);


let t1 = x_hi_lo * Simd::splat(EPSILON);

let (t2_wrapped, carry) = overflowing_add_simd(t0, t1);
let mut r = t2_wrapped + (carry.select(Simd::splat(EPSILON), Simd::splat(0)));

let p = Simd::splat(MODULUS);
r = r.simd_ge(p).select(r - p, r);

r
}

/// Helper for overflowing add in SIMD
#[inline(always)]
fn overflowing_add_simd<const LANES: usize>(
a: Simd<u64, LANES>,
b: Simd<u64, LANES>
) -> (Simd<u64, LANES>, Mask<i64, LANES>)
where LaneCount<LANES>: SupportedLaneCount
{
let res = a + b;
(res, res.simd_lt(a))
}


#[cfg(test)]
mod tests {
use crate::experimental::goldilocks::MODULUS;

use super::mul_v;
use ark_std::{rand::RngCore, simd::Simd, test_rng};

#[test]
fn single() {
// https://asecuritysite.com/zk/go_plonk4

let a_input: [u64; 1] = [10719222850664546238];
let b_input: [u64; 1] = [301075827032876239];

// 1. Calculate Expected using u128
let expected = ((a_input[0] as u128 * b_input[0] as u128) % MODULUS as u128) as u64;

// 2. Calculate Received using your mul_v
const LANES: usize = 1;
let a_simd = Simd::<u64, LANES>::from_slice(&a_input);
let b_simd = Simd::<u64, LANES>::from_slice(&b_input);
let res_simd = mul_v(&a_simd, &b_simd);
let received = res_simd.as_array()[0];

println!("Expected: {}, Received: {}", expected, received);
assert_eq!(expected, received);
}

#[test]
fn sanity() {
const LEN: usize = 1 << 20;
let mut rng = test_rng();

// random elements
let multipliers: Vec<u64> = (0..LEN).map(|_| rng.next_u64() % MODULUS).collect();
let mut expected_ef: Vec<u64> = (0..LEN).map(|_| rng.next_u64()).collect();

let mut received_ef = expected_ef.clone();

// control
expected_ef
.iter_mut()
.zip(multipliers.iter())
.for_each(|(a, b)| {
let prod = (*a as u128) * (*b as u128);
*a = (prod % MODULUS as u128) as u64;
});


const LANES: usize = 16;
for (a_chunk, b_chunk) in received_ef.chunks_mut(LANES).zip(multipliers.chunks(LANES)) {
let a_simd = Simd::<u64, LANES>::from_slice(a_chunk);
let b_simd = Simd::<u64, LANES>::from_slice(b_chunk);
// perfom op
let res = mul_v(&a_simd, &b_simd);
// write back into slice
a_chunk.copy_from_slice(res.as_array());
}

assert_eq!(expected_ef, received_ef);
}
}
47 changes: 47 additions & 0 deletions src/experimental/goldilocks/arithmetic/sub.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use ark_std::simd::{cmp::SimdPartialOrd, LaneCount, Simd, SupportedLaneCount};

// https://github.com/zhenfeizhang/Goldilocks/blob/872114997b82d0157e29a702992a3bd2023aa7ba/src/primefield/fp.rs#L424
#[inline(always)]
pub fn sub(a: u64, b: u64) -> u64 {
let (diff, underflow) = a.overflowing_sub(b);
if underflow {
// If a < b, the raw diff is (a - b) + 2^64.
// Since 2^64 mod p = 2^32 - 1, we subtract (2^32 - 1) to correct it.
diff.wrapping_sub(0xFFFFFFFF)
} else {
diff
}
}

#[inline(always)]
pub fn sub_v<const LANES: usize>(a: &Simd<u64, LANES>, b: &Simd<u64, LANES>) -> Simd<u64, LANES>
where
LaneCount<LANES>: SupportedLaneCount,
{
let epsilon = Simd::<u64, LANES>::splat(0xFFFFFFFF);

// 1. Standard wrapping subtraction
let diff = a - b;

// 2. Detect underflow (a < b)
let underflow_mask = a.simd_lt(*b);

// 3. If underflowed, we have diff = (a - b) + 2^64.
// To get (a - b) mod p, we need (a - b) + (2^64 - 2^32 + 1).
// So we subtract (2^32 - 1).
underflow_mask.select(diff - epsilon, diff)
}

#[cfg(test)]
mod tests {
use super::sub_v;
use ark_std::simd::Simd;

#[test]
fn sanity() {
let a: [u64; 1] = [9];
let b: [u64; 1] = [7];
let diff = sub_v(&Simd::from_array(a), &Simd::from_array(b));
assert_eq!(diff[0], 2);
}
}
5 changes: 5 additions & 0 deletions src/experimental/goldilocks/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/// 2^64 - 2^32 + 1
pub const MODULUS: u64 = 0xffffffff00000001;

/// 2^32 - 1
pub const EPSILON: u64 = 0xffffffff;
Loading