diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml index 68fae4fb0..81c12d15c 100644 --- a/.github/workflows/pr_main.yaml +++ b/.github/workflows/pr_main.yaml @@ -213,7 +213,7 @@ jobs: - name: Build and archive prover + crypto tests run: | cargo nextest archive --release \ - -p lambda-vm-prover -p stark -p crypto \ + -p lambda-vm-prover -p stark -p crypto -p ecsm \ --archive-file prover-tests.tar.zst - name: Upload test archive diff --git a/Cargo.lock b/Cargo.lock index 56f65fcf5..33fd1fb71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -977,6 +977,15 @@ dependencies = [ "spki", ] +[[package]] +name = "ecsm" +version = "0.1.0" +dependencies = [ + "k256", + "num-bigint 0.4.6", + "num-traits", +] + [[package]] name = "educe" version = "0.6.0" @@ -1327,6 +1336,7 @@ dependencies = [ name = "executor" version = "0.1.0" dependencies = [ + "ecsm", "guest_program", "rkyv", "rustc-demangle", @@ -1982,6 +1992,7 @@ dependencies = [ "bincode", "criterion 0.5.1", "crypto", + "ecsm", "env_logger", "executor", "log", diff --git a/Cargo.toml b/Cargo.toml index 2ba670c40..d64852eb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "crypto/crypto", "crypto/math", "crypto/math-cuda", + "crypto/ecsm", "bin/cli", ] diff --git a/crypto/ecsm/Cargo.toml b/crypto/ecsm/Cargo.toml new file mode 100644 index 000000000..52eb0962e --- /dev/null +++ b/crypto/ecsm/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "ecsm" +description = "secp256k1 scalar multiplication reference + ECSM accelerator witness generation" +version = "0.1.0" +edition = "2024" +license.workspace = true + +[dependencies] +num-bigint = "0.4.6" +num-traits = "0.2.19" +# Audited secp256k1 arithmetic (host-side witness generation only; never in the +# constraint system). Used for the projective double-and-add replay + batch +# inversion that builds the ECDAS step witnesses efficiently. +k256 = { version = "0.13", default-features = false, features = ["arithmetic", "expose-field"] } diff --git a/crypto/ecsm/src/curve.rs b/crypto/ecsm/src/curve.rs new file mode 100644 index 000000000..bad7238f1 --- /dev/null +++ b/crypto/ecsm/src/curve.rs @@ -0,0 +1,418 @@ +//! secp256k1 curve arithmetic in affine coordinates and the chip-faithful +//! double-and-add replay. +//! +//! The curve is `y^2 = x^3 + 7 mod p` (short Weierstrass with `a = 0`). The point at +//! infinity never appears: the ECSM/ECDAS design guarantees it cannot occur for +//! `k in [1, N)` (see `ecsm.typ` "Point at infinity" / ECDAS soundness argument), so the +//! affine formulas below are always well defined. + +use num_bigint::BigUint; + +#[cfg(test)] +use crate::field::Fp; + +/// An affine curve point. Never the point at infinity. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AffinePoint { + pub x: BigUint, + pub y: BigUint, +} + +/// Recovers the canonical (even) `y` for a given `x` such that `y^2 = x^3 + b mod p`. +/// +/// Both `y` and `p - y` are valid; we pick the even one so the executor and prover agree +/// deterministically. The chip never constrains the parity (it only writes back `xR`, and +/// `k·P` and `k·(-P)` share an x-coordinate), so any consistent choice is sound. +/// +/// Returns `None` when `x` is not a valid curve x-coordinate (`x^3 + b` is not a quadratic +/// residue, or `x` is not a canonical field element). +pub fn recover_y_canonical(x: &BigUint) -> Option { + // SEC1 compressed encoding: the `0x02` prefix selects the even-`y` root, delegated to k256. + let mut enc = [0u8; 33]; + enc[0] = 0x02; + enc[1..33].copy_from_slice(&be32(x)); + let ep = EncodedPoint::from_bytes(enc).ok()?; + let affine: K256Affine = Option::from(K256Affine::from_encoded_point(&ep))?; + Some(from_k256_affine(&affine).y) +} + +/// `2·a` on the curve. Requires `a.y != 0` (always true on secp256k1). +#[cfg(test)] +pub fn point_double(a: &AffinePoint) -> AffinePoint { + let x = Fp::new(a.x.clone()); + let y = Fp::new(a.y.clone()); + // λ = 3x² / 2y + let three_x2 = x.mul(&x).mul(&Fp::from_u64(3)); + let two_y = y.add(&y); + let lambda = three_x2.mul(&two_y.inv()); + // xr = λ² - 2x + let xr = lambda.mul(&lambda).sub(&x).sub(&x); + // yr = λ(x - xr) - y + let yr = lambda.mul(&x.sub(&xr)).sub(&y); + AffinePoint { x: xr.0, y: yr.0 } +} + +/// `a + g` on the curve. Requires `a.x != g.x` (always true in the chip's add steps). +#[cfg(test)] +pub fn point_add(a: &AffinePoint, g: &AffinePoint) -> AffinePoint { + let xa = Fp::new(a.x.clone()); + let ya = Fp::new(a.y.clone()); + let xg = Fp::new(g.x.clone()); + let yg = Fp::new(g.y.clone()); + // λ = (yg - ya) / (xg - xa) + let lambda = yg.sub(&ya).mul(&xg.sub(&xa).inv()); + // xr = λ² - xa - xg + let xr = lambda.mul(&lambda).sub(&xa).sub(&xg); + // yr = λ(xa - xr) - ya + let yr = lambda.mul(&xa.sub(&xr)).sub(&ya); + AffinePoint { x: xr.0, y: yr.0 } +} + +/// One step of the double-and-add replay, at point level. +/// +/// Mirrors a single ECDAS row: receive accumulator `a` (and base `g`), perform `op` +/// (0 = double, 1 = add), and decide `next_op` (whether the next row is an add). +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct StepPts { + pub a: AffinePoint, + pub g: AffinePoint, + pub round: u8, + pub op: u8, + pub next_op: u8, + pub r: AffinePoint, + /// Slope of this step: add => (yG-yA)/(xG-xA), double => 3xA^2/(2yA). + /// Precomputed here (batched) so the witness builder never inverts per step. + pub lambda: BigUint, +} + +/// Reference slope `lambda` for one step, computed in `BigUint` `F_p`. +/// Used by the reference replay and the k256 parity test. +#[cfg(test)] +pub fn step_lambda(a: &AffinePoint, g: &AffinePoint, op: u8) -> BigUint { + let xa = Fp::new(a.x.clone()); + let ya = Fp::new(a.y.clone()); + if op == 1 { + let xg = Fp::new(g.x.clone()); + let yg = Fp::new(g.y.clone()); + yg.sub(&ya).mul(&xg.sub(&xa).inv()).0 + } else { + let three_x2 = xa.mul(&xa).mul(&Fp::from_u64(3)); + let two_y = ya.add(&ya); + three_x2.mul(&two_y.inv()).0 + } +} + +/// Bit length minus one = position of the most significant set bit (`len_k`). +/// Requires `k >= 1`. +pub fn msb_position(k: &BigUint) -> u32 { + debug_assert!(k > &BigUint::from(0u8)); + (k.bits() as u32) - 1 +} + +/// Replays the ECDAS double-and-add sequence for `k·g`, returning every step and the +/// final point. This is the single source of truth for both the executor (which needs +/// only `final.x`) and the prover (which needs the full step list to build witnesses). +/// +/// The schedule matches the spec exactly: start with `A = g`, `round = len_k - 1`, +/// `op = double`; a double at `round` sets `next_op` to the scalar bit at `round` +/// (1 ⇒ the next row adds at the same round); an add forces `next_op = 0` and advances +/// the round. The MSB itself is represented by the initial `A = g` (consumed by ECSM via +/// the `BIT[len_k]` interaction), so it is never processed as an add here. +#[cfg(test)] +pub fn replay_double_and_add_reference( + k: &BigUint, + g: &AffinePoint, +) -> (Vec, AffinePoint) { + let m = msb_position(k) as i64; // len_k + let mut a = g.clone(); + let mut round: i64 = m - 1; + let mut op: u8 = 0; // double + let mut steps = Vec::new(); + + while round >= 0 { + let (r, next_op) = if op == 0 { + let r = point_double(&a); + let bit = if k.bit(round as u64) { 1u8 } else { 0u8 }; + (r, bit) + } else { + let r = point_add(&a, g); + (r, 0u8) + }; + steps.push(StepPts { + lambda: step_lambda(&a, g, op), + a: a.clone(), + g: g.clone(), + round: round as u8, + op, + next_op, + r: r.clone(), + }); + let round_sent = round - (1 - next_op as i64); + a = r; + if round_sent < 0 { + break; + } + round = round_sent; + op = next_op; + } + + (steps, a) +} + +// ========================================================================= +// k256-backed fast path: projective double-and-add replay + batch inversion. +// +// The witness generator is untrusted (the ECDAS chip re-proves every step), so +// any audited arithmetic is sound here. We replay the schedule in k256 +// projective coordinates (no per-op inversion), `batch_normalize` all points to +// affine in one shot, and batch-invert the slope denominators — replacing the +// ~2*len_k Fermat inversions of the reference with two batched inversions. +// ========================================================================= + +use k256::elliptic_curve::ff::PrimeField as _; +use k256::elliptic_curve::group::Curve as _; +use k256::elliptic_curve::sec1::{FromEncodedPoint, ToEncodedPoint}; +use k256::{AffinePoint as K256Affine, EncodedPoint, FieldElement, ProjectivePoint, Scalar}; + +/// 32 big-endian bytes of a value known to fit in 256 bits (left zero-padded). +fn be32(v: &BigUint) -> [u8; 32] { + let b = v.to_bytes_be(); + debug_assert!(b.len() <= 32, "value exceeds 256 bits"); + let mut out = [0u8; 32]; + out[32 - b.len()..].copy_from_slice(&b); + out +} + +fn fe_from_biguint(v: &BigUint) -> FieldElement { + Option::from(FieldElement::from_bytes(&be32(v).into())) + .expect("ECSM: field element must be < p") +} + +fn biguint_from_fe(f: &FieldElement) -> BigUint { + BigUint::from_bytes_be(&f.to_bytes()) +} + +fn to_k256_affine(a: &AffinePoint) -> K256Affine { + let ep = EncodedPoint::from_affine_coordinates(&be32(&a.x).into(), &be32(&a.y).into(), false); + Option::from(K256Affine::from_encoded_point(&ep)).expect("ECSM: point must be on the curve") +} + +fn from_k256_affine(p: &K256Affine) -> AffinePoint { + let ep = p.to_encoded_point(false); + AffinePoint { + x: BigUint::from_bytes_be(ep.x().expect("ECSM: affine point has x")), + y: BigUint::from_bytes_be(ep.y().expect("ECSM: affine point has y")), + } +} + +/// Montgomery's batch inversion over `FieldElement`: one real inversion total. +fn batch_invert(xs: &[FieldElement]) -> Vec { + let n = xs.len(); + let mut prefix = Vec::with_capacity(n); + let mut acc = FieldElement::ONE; + for x in xs { + prefix.push(acc); + acc *= *x; + } + let mut inv = + Option::::from(acc.invert()).expect("ECSM: batch denominator is nonzero"); + let mut out = vec![FieldElement::ONE; n]; + for i in (0..n).rev() { + out[i] = prefix[i] * inv; + inv *= xs[i]; + } + out +} + +/// The double-and-add schedule for `k`: one `(round, op, next_op)` per ECDAS row. +/// Pure bit logic (data-independent of point values), identical control flow to +/// the reference replay. +fn schedule(k: &BigUint) -> Vec<(u8, u8, u8)> { + let m = msb_position(k) as i64; + let mut sched = Vec::new(); + let mut round: i64 = m - 1; + let mut op: u8 = 0; + while round >= 0 { + let next_op = if op == 0 { + if k.bit(round as u64) { 1u8 } else { 0u8 } + } else { + 0u8 + }; + sched.push((round as u8, op, next_op)); + let round_sent = round - (1 - next_op as i64); + if round_sent < 0 { + break; + } + round = round_sent; + op = next_op; + } + sched +} + +/// Executor fast path: the x-coordinate of `k·g`, via k256's optimized scalar +/// multiplication. Needs no step list or slopes, so it skips all witness work. +/// `k` must be in `[1, N)` (guaranteed by `prepare`). +pub fn scalar_mul_affine_x(k: &BigUint, g: &AffinePoint) -> BigUint { + let scalar = Option::::from(Scalar::from_repr(be32(k).into())) + .expect("ECSM: scalar k must be < N"); + let g_proj = ProjectivePoint::from(to_k256_affine(g)); + let r = (g_proj * scalar).to_affine(); + from_k256_affine(&r).x +} + +/// Replays the ECDAS double-and-add for `k·g` using k256 projective arithmetic and +/// batched inversion. Produces the identical `StepPts` sequence as +/// [`replay_double_and_add_reference`] (validated by the parity test), but with two +/// batched inversions instead of one per double/add step. +pub fn replay_double_and_add(k: &BigUint, g: &AffinePoint) -> (Vec, AffinePoint) { + let sched = schedule(k); + if sched.is_empty() { + return (Vec::new(), g.clone()); // k == 1: result is g, no steps + } + let n = sched.len(); + + // 1. projective replay (no inversions): record a and r at every step. + let g_proj = ProjectivePoint::from(to_k256_affine(g)); + let mut a_proj = g_proj; + let mut points = Vec::with_capacity(2 * n); // [a_0..a_{n-1}, r_0..r_{n-1}] + let mut r_projs = Vec::with_capacity(n); + for &(_, op, _) in &sched { + let r_proj = if op == 0 { + a_proj.double() + } else { + a_proj + g_proj + }; + points.push(a_proj); + r_projs.push(r_proj); + a_proj = r_proj; + } + points.extend_from_slice(&r_projs); + + // 2. one batch_normalize for every a and r. + let mut affine = vec![K256Affine::IDENTITY; points.len()]; + ProjectivePoint::batch_normalize(&points, &mut affine); + let a_aff: Vec = affine[..n].iter().map(from_k256_affine).collect(); + let r_aff: Vec = affine[n..].iter().map(from_k256_affine).collect(); + + // 3. batch-invert all slope denominators (add: xG-xA, double: 2yA). + let gx_fe = fe_from_biguint(&g.x); + let gy_fe = fe_from_biguint(&g.y); + let denoms: Vec = (0..n) + .map(|i| { + if sched[i].1 == 1 { + gx_fe - fe_from_biguint(&a_aff[i].x) + } else { + let ya = fe_from_biguint(&a_aff[i].y); + ya + ya + } + }) + .collect(); + let inv_denoms = batch_invert(&denoms); + + // 4. slopes and StepPts. + let steps: Vec = (0..n) + .map(|i| { + let num = if sched[i].1 == 1 { + gy_fe - fe_from_biguint(&a_aff[i].y) + } else { + let x2 = { + let xa = fe_from_biguint(&a_aff[i].x); + xa * xa + }; + x2 + x2 + x2 // 3 xA^2 + }; + StepPts { + a: a_aff[i].clone(), + g: g.clone(), + round: sched[i].0, + op: sched[i].1, + next_op: sched[i].2, + r: r_aff[i].clone(), + lambda: biguint_from_fe(&(num * inv_denoms[i])), + } + }) + .collect(); + + let result = r_aff[n - 1].clone(); + (steps, result) +} + +#[cfg(test)] +mod parity_tests { + use super::*; + use crate::n; + use num_bigint::BigUint; + + /// secp256k1 generator (even y), via the canonical y recovery. + fn generator() -> AffinePoint { + let gx = BigUint::parse_bytes( + b"79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", + 16, + ) + .unwrap(); + let gy = recover_y_canonical(&gx).expect("G on curve"); + AffinePoint { x: gx, y: gy } + } + + fn be(hex: &[u8]) -> BigUint { + BigUint::parse_bytes(hex, 16).unwrap() + } + + /// The k256 fast path must produce byte-identical `StepPts` (points + λ) and the + /// same final point as the BigUint reference, across small, structured, large and + /// near-order scalars. This pins the audited fast path to the spec-faithful reference. + #[test] + fn k256_replay_matches_reference() { + let g = generator(); + let mut scalars: Vec = (1u64..40).map(BigUint::from).collect(); + for &kv in &[ + 0xFFu64, + 0x101, + 0xABCD, + 0xFFFF, + 0x1_0000, + 1 << 20, + 123_456_789, + u64::MAX, + ] { + scalars.push(BigUint::from(kv)); + } + // large 256-bit scalars (must stay < N) and the order boundary + scalars.push(be( + b"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF", + )); + scalars.push(be( + b"7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0", + )); + scalars.push(&n() / BigUint::from(2u8)); + scalars.push(&n() - BigUint::from(1u8)); + + for k in scalars { + let (steps, result) = replay_double_and_add(&k, &g); + let (steps_ref, result_ref) = replay_double_and_add_reference(&k, &g); + assert_eq!(result, result_ref, "final point mismatch for k = {k}"); + assert_eq!(steps, steps_ref, "step list mismatch for k = {k}"); + } + } + + /// The executor's fast path (`scalar_mul_affine_x`) and the prover's replay must agree + /// on `x(k·G)`: the executor writes it to guest memory and the prover proves it, so any + /// divergence would make a correct execution unprovable. They run through two distinct + /// k256 entry points (native scalar-mul vs projective double-and-add), so pin them here. + #[test] + fn executor_and_replay_agree_on_result_x() { + let g = generator(); + let mut scalars: Vec = (1u64..40).map(BigUint::from).collect(); + for &kv in &[0xFFu64, 0xABCD, 1 << 20, 123_456_789, u64::MAX] { + scalars.push(BigUint::from(kv)); + } + scalars.push(&n() / BigUint::from(2u8)); + scalars.push(&n() - BigUint::from(1u8)); + + for k in scalars { + let (_steps, result) = replay_double_and_add(&k, &g); + let exec_x = scalar_mul_affine_x(&k, &g); + assert_eq!(result.x, exec_x, "executor/replay x mismatch for k = {k}"); + } + } +} diff --git a/crypto/ecsm/src/field.rs b/crypto/ecsm/src/field.rs new file mode 100644 index 000000000..5e6c550b8 --- /dev/null +++ b/crypto/ecsm/src/field.rs @@ -0,0 +1,46 @@ +//! Arithmetic in the secp256k1 base field `F_p` with `p = 2^256 - 2^32 - 977`. +//! +//! Elements are stored as `BigUint` always reduced into `[0, p)`. This is reference +//! arithmetic used to derive accelerator witnesses — it runs once per `ECALL`, never +//! in a hot loop, so clarity is preferred over speed. + +use num_bigint::BigUint; + +use crate::p; + +/// An element of the secp256k1 base field, kept reduced into `[0, p)`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Fp(pub BigUint); + +impl Fp { + /// Reduces an arbitrary value into the field. + pub fn new(v: BigUint) -> Self { + Fp(v % p()) + } + + pub fn from_u64(v: u64) -> Self { + Fp(BigUint::from(v) % p()) + } + + /// `self + other mod p`. Both operands must already be reduced. + pub fn add(&self, other: &Fp) -> Fp { + Fp((&self.0 + &other.0) % p()) + } + + /// `self - other mod p`. Both operands must already be reduced. + pub fn sub(&self, other: &Fp) -> Fp { + let t = &self.0 + p(); // in [p, 2p) + Fp((t - &other.0) % p()) + } + + /// `self * other mod p`. Both operands must already be reduced. + pub fn mul(&self, other: &Fp) -> Fp { + Fp((&self.0 * &other.0) % p()) + } + + /// Multiplicative inverse via Fermat's little theorem (`p` is prime): `self^(p-2)`. + /// Returns zero for a zero input (which never occurs for valid curve arithmetic). + pub fn inv(&self) -> Fp { + Fp(self.0.modpow(&(p() - BigUint::from(2u32)), &p())) + } +} diff --git a/crypto/ecsm/src/lib.rs b/crypto/ecsm/src/lib.rs new file mode 100644 index 000000000..56a948a12 --- /dev/null +++ b/crypto/ecsm/src/lib.rs @@ -0,0 +1,252 @@ +//! Reference secp256k1 scalar multiplication and ECSM-accelerator witness generation. +//! +//! This crate is shared by the executor (which needs `k·G`'s x-coordinate to write back +//! to guest memory) and the prover (which replays the full double-and-add sequence to +//! fill the ECSM / ECDAS / EC_SCALAR trace witnesses). Both entry points compute the same +//! `k·G` over the audited `k256` curve arithmetic — the executor via `k256`'s scalar +//! multiplication, the prover via a projective double-and-add replay — so the x-coordinate +//! they write/prove agrees. It is also independent of the `yG` root: both recover the same +//! canonical `yG` in `prepare`, and `k·P` and `k·(-P)` share an x. +//! +//! Curve point operations are delegated to the RustCrypto `k256` crate; witness generation +//! replays the schedule in `k256` projective coordinates and batch-inverts the slope +//! denominators, while `num-bigint` carries the coordinate/limb representation the trace +//! needs. All of this runs once per `ECALL`, so it is not performance critical. +//! +//! Curve: secp256k1, `y^2 = x^3 + 7 mod p`, `p = 2^256 - 2^32 - 977`, order `N`. + +pub mod curve; +pub mod field; +pub mod witness; + +use num_bigint::BigUint; + +pub use curve::{AffinePoint, recover_y_canonical, replay_double_and_add}; +pub use witness::{EcdasStep, EcsmWitness, compute_witness}; + +/// secp256k1 curve coefficient `b`. +pub const B: u64 = 7; + +/// Prime field modulus `p = 2^256 - 2^32 - 977`, little-endian bytes. +pub const P_BYTES: [u8; 32] = [ + 0x2F, 0xFC, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +]; + +/// Curve group order `N`, little-endian bytes. +pub const N_BYTES: [u8; 32] = [ + 0x41, 0x41, 0x36, 0xD0, 0x8C, 0x5E, 0xD2, 0xBF, 0x3B, 0xA0, 0x48, 0xAF, 0xE6, 0xDC, 0xAE, 0xBA, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +]; + +/// The prime field modulus `p` as a `BigUint`. +pub fn p() -> BigUint { + BigUint::from_bytes_le(&P_BYTES) +} + +/// The curve order `N` as a `BigUint`. +pub fn n() -> BigUint { + BigUint::from_bytes_le(&N_BYTES) +} + +/// Errors that prevent a sound ECSM witness from existing for the given inputs. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EcsmError { + /// `k == 0`: `0·G` is the point at infinity, which the accelerator cannot represent. + ScalarIsZero, + /// `k >= N`: outside the valid scalar range `[1, N)`. + ScalarOutOfRange, + /// `x^3 + b` is not a quadratic residue, so `xG` is not a valid x-coordinate. + NotOnCurve, + /// `xG >= p`: not a canonical field element. Reducing it silently would + /// diverge from the prover, whose `xR < p` range check makes a non-canonical + /// input unprovable (with `k = 1` the input is echoed back as `xR`). + CoordinateOutOfRange, +} + +impl core::fmt::Display for EcsmError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + EcsmError::ScalarIsZero => write!(f, "ECSM scalar k must be non-zero"), + EcsmError::ScalarOutOfRange => write!(f, "ECSM scalar k must be < N"), + EcsmError::NotOnCurve => write!(f, "ECSM xG is not a valid curve x-coordinate"), + EcsmError::CoordinateOutOfRange => write!(f, "ECSM xG must be < p"), + } + } +} + +impl std::error::Error for EcsmError {} + +/// Converts a `BigUint` to 32 little-endian bytes (zero-padded / truncated to 32). +pub fn to_le_32(v: &BigUint) -> [u8; 32] { + debug_assert!(v.bits() <= 256, "to_le_32: value exceeds 256 bits"); + let mut bytes = v.to_bytes_le(); + bytes.resize(32, 0); + let mut out = [0u8; 32]; + out.copy_from_slice(&bytes[..32]); + out +} + +/// Validates the scalar and recovers the generator point from `(xG, k)`. +/// +/// Shared front-end for both entry points: checks `0 < k < N`, rebuilds `xG`, and recovers +/// the canonical `yG`. +pub(crate) fn prepare( + k_le: &[u8; 32], + xg_le: &[u8; 32], +) -> Result<(BigUint, AffinePoint), EcsmError> { + let k = BigUint::from_bytes_le(k_le); + if k == BigUint::from(0u8) { + return Err(EcsmError::ScalarIsZero); + } + if k >= n() { + return Err(EcsmError::ScalarOutOfRange); + } + let xg = BigUint::from_bytes_le(xg_le); + if xg >= p() { + return Err(EcsmError::CoordinateOutOfRange); + } + let yg = recover_y_canonical(&xg).ok_or(EcsmError::NotOnCurve)?; + Ok((k, AffinePoint { x: xg, y: yg })) +} + +/// Computes the x-coordinate of `k·G` over secp256k1, given `k` and `xG` as little-endian +/// 32-byte values. This is the executor's entry point — it writes the returned bytes back +/// to guest memory at `addr_xR`. +pub fn scalar_mul_x(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<[u8; 32], EcsmError> { + let (k, g) = prepare(k_le, xg_le)?; + Ok(to_le_32(&curve::scalar_mul_affine_x(&k, &g))) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Parses a big-endian hex string into a `BigUint`. + fn be_hex(s: &str) -> BigUint { + BigUint::parse_bytes(s.as_bytes(), 16).unwrap() + } + + // secp256k1 generator G. + const GX_HEX: &str = "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798"; + const GY_HEX: &str = "483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8"; + + fn gx() -> BigUint { + be_hex(GX_HEX) + } + + #[test] + fn constants_match_known_secp256k1_values() { + assert_eq!( + p(), + be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F") + ); + assert_eq!( + n(), + be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141") + ); + // p ≡ 3 mod 4 (a known secp256k1 property). + assert_eq!(&p() % 4u32, BigUint::from(3u8)); + } + + #[test] + fn generator_is_on_curve_and_y_is_canonical() { + // Gy ends in 0xB8 (even), so the canonical (even) root is Gy itself. + let y = recover_y_canonical(&gx()).expect("G is on the curve"); + assert_eq!(y, be_hex(GY_HEX)); + assert!(!y.bit(0), "canonical root must be even"); + } + + #[test] + fn recover_y_handles_residues_and_non_residues() { + // Roughly half of all x are non-residues; scan a small range and check both + // branches deterministically: every recovered y is even and on the curve, and at + // least one x has no valid y (the `None` path). + let mut saw_none = false; + let mut saw_some = false; + for x in 1u32..40 { + let xb = BigUint::from(x); + match recover_y_canonical(&xb) { + Some(y) => { + saw_some = true; + assert!(!y.bit(0), "recovered y must be even"); + // y^2 == x^3 + b mod p + let lhs = (&y * &y) % p(); + let rhs = (&xb * &xb % p() * &xb + BigUint::from(B)) % p(); + assert_eq!(lhs, rhs); + } + None => saw_none = true, + } + } + assert!( + saw_some && saw_none, + "expected both residues and non-residues in range" + ); + } + + #[test] + fn scalar_mul_one_is_identity() { + let k = to_le_32(&BigUint::from(1u8)); + let xg = to_le_32(&gx()); + assert_eq!(scalar_mul_x(&k, &xg).unwrap(), xg); + } + + #[test] + fn scalar_mul_two_matches_known_2g() { + // x(2G) for secp256k1. + let expected = be_hex("C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE5"); + let k = to_le_32(&BigUint::from(2u8)); + let xg = to_le_32(&gx()); + assert_eq!(scalar_mul_x(&k, &xg).unwrap(), to_le_32(&expected)); + } + + #[test] + fn scalar_mul_three_matches_known_3g() { + let expected = be_hex("F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9"); + let k = to_le_32(&BigUint::from(3u8)); + let xg = to_le_32(&gx()); + assert_eq!(scalar_mul_x(&k, &xg).unwrap(), to_le_32(&expected)); + } + + #[test] + fn scalar_mul_n_minus_one_shares_x_with_g() { + // (N-1)·G = -G, which has the same x-coordinate as G. + let k = to_le_32(&(n() - BigUint::from(1u8))); + let xg = to_le_32(&gx()); + assert_eq!(scalar_mul_x(&k, &xg).unwrap(), xg); + } + + #[test] + fn rejects_zero_and_out_of_range_scalars() { + let xg = to_le_32(&gx()); + assert_eq!( + scalar_mul_x(&to_le_32(&BigUint::from(0u8)), &xg), + Err(EcsmError::ScalarIsZero) + ); + assert_eq!( + scalar_mul_x(&to_le_32(&n()), &xg), + Err(EcsmError::ScalarOutOfRange) + ); + } + + #[test] + fn rejects_non_canonical_xg() { + // xG = p and xG = p + 1 (the alias of x = 1) must be rejected, not + // silently reduced: with k = 1 the input bytes would be echoed back as + // xR, which the prover's xR < p range check cannot prove. + let k = to_le_32(&BigUint::from(1u8)); + for delta in [0u8, 1] { + assert_eq!( + scalar_mul_x(&k, &to_le_32(&(p() + BigUint::from(delta)))), + Err(EcsmError::CoordinateOutOfRange), + "xG = p + {delta} must be rejected" + ); + } + // p − 1 is below the bound, so it must NOT hit the canonicity check + // (it is not on the curve, which is a different error). + assert_eq!( + scalar_mul_x(&k, &to_le_32(&(p() - BigUint::from(1u8)))), + Err(EcsmError::NotOnCurve) + ); + } +} diff --git a/crypto/ecsm/src/witness.rs b/crypto/ecsm/src/witness.rs new file mode 100644 index 000000000..44bc49d4c --- /dev/null +++ b/crypto/ecsm/src/witness.rs @@ -0,0 +1,502 @@ +//! ECSM / ECDAS witness generation. +//! +//! For one `ECALL`, the prover must fill the byte-limb witnesses that the ECSM and ECDAS +//! chips constrain: the `yG` reconstruction, the scalar range data, and — per double/add +//! step — the slope `λ`, three quotients, and three carry arrays. This module computes all +//! of them by literally reproducing the spec's limb-convolution recurrences, so the values +//! it emits satisfy the AIR constraints by construction. +//! +//! ## Limb-convolution carries +//! +//! Each "`x ≡ y mod p`" relation is expressed in the spec as a 512-bit integer identity +//! `LHS − RHS = 0`, written limb-by-limb (8-bit limbs) with a chain of carries: +//! `2^8·c_i = c_{i-1} + S_i`, `c_{-1} = 0`, closing with `c_63 = 0` (see `ecsm.typ` +//! "Discussing the carries"). `S_i` is the coefficient of `2^{8i}` in `LHS − RHS` +//! (a sum of byte products — the convolution — plus single-limb terms). Carries can be +//! negative; the chip range-checks `c_i + offset` as a halfword. We reproduce the exact +//! integer recurrence here; the prover converts the resulting integers to field elements. + +use num_bigint::{BigInt, BigUint}; +use num_traits::{Signed, Zero}; + +use crate::curve::{StepPts, replay_double_and_add}; +use crate::{B, EcsmError, P_BYTES, n, p, prepare, to_le_32}; + +/// Full ECSM-chip witness for one scalar multiplication (one ECSM row). +#[derive(Debug, Clone)] +pub struct EcsmWitness { + pub x_g: [u8; 32], + pub y_g: [u8; 32], + pub k: [u8; 32], + /// `x2 = xG^2 mod p` + pub x2: [u8; 32], + /// quotient for the `x2` relation + pub q0: [u8; 32], + /// carries for the `x2` relation + pub c0: [i64; 64], + /// quotient for the `yG` relation (33 bytes; byte 32 is a single bit) + pub q1: [u8; 33], + /// carries for the `yG` relation + pub c1: [i64; 64], + /// `(k - N) mod 2^256` + pub k_sub_n: [u8; 32], + /// `(xR - p) mod 2^256` + pub x_r_sub_p: [u8; 32], + /// position of the most significant set bit of `k` + pub len_k: u8, + pub x_r: [u8; 32], + pub y_r: [u8; 32], + /// the double/add steps (one ECDAS row each; empty when `k == 1`) + pub steps: Vec, +} + +/// Full ECDAS-chip witness for one double/add step (one ECDAS row). +#[derive(Debug, Clone)] +pub struct EcdasStep { + pub x_a: [u8; 32], + pub y_a: [u8; 32], + pub x_g: [u8; 32], + pub y_g: [u8; 32], + pub round: u8, + /// 0 = double, 1 = add + pub op: u8, + /// op-flag of the next step (1 ⇒ next row adds at this round) + pub next_op: u8, + pub lambda: [u8; 32], + pub x_r: [u8; 32], + pub y_r: [u8; 32], + /// quotient for the `λ` relation (33 bytes) + pub q0: [u8; 33], + /// quotient for the `xR` relation (33 bytes) + pub q1: [u8; 33], + /// quotient for the `yR` relation (33 bytes) + pub q2: [u8; 33], + pub c0: [i64; 64], + pub c1: [i64; 64], + pub c2: [i64; 64], +} + +// ========================================================================= +// Limb helpers +// ========================================================================= + +/// Zero-extends a little-endian byte slice (≤ 64 bytes) to 64 `i128` limbs. +fn ext64(bytes: &[u8]) -> [i128; 64] { + let mut a = [0i128; 64]; + for (i, &b) in bytes.iter().enumerate() { + a[i] = b as i128; + } + a +} + +/// Convolution `Σ_{j=0}^{i} a[j]·b[i-j]`. +fn conv(a: &[i128; 64], b: &[i128; 64], i: usize) -> i128 { + let mut s = 0i128; + for j in 0..=i { + s += a[j] * b[i - j]; + } + s +} + +/// Computes the 64 carries from per-limb terms via `2^8·c_i = c_{i-1} + terms_i`, +/// `c_{-1} = 0`, asserting exact divisibility at every limb and the closing `c_63 = 0`. +/// +/// These asserts catch any transcription error in the `terms` builders: for valid inputs +/// the relation `LHS − RHS = 0` holds exactly, so every partial sum is divisible by 256. +fn limb_carries(terms: &[i128; 64]) -> [i64; 64] { + let mut c = [0i64; 64]; + let mut carry: i128 = 0; + for i in 0..64 { + let s = carry + terms[i]; + assert!(s % 256 == 0, "ECSM witness: limb {i} not divisible by 256"); + carry = s / 256; + c[i] = carry as i64; + } + assert!(c[63] == 0, "ECSM witness: closing carry c_63 must be 0"); + c +} + +// ========================================================================= +// Per-relation carry builders (mirror the spec TOML polys exactly) +// ========================================================================= + +/// ECSM `x2` relation: `xG^2 − x2 − q0·p = 0`. +fn carries_x2(xg: &[i128; 64], x2: &[i128; 64], q0: &[i128; 64], pp: &[i128; 64]) -> [i64; 64] { + let mut terms = [0i128; 64]; + for i in 0..64 { + terms[i] = conv(xg, xg, i) - x2[i] - conv(q0, pp, i); + } + limb_carries(&terms) +} + +/// ECSM `yG` relation: `yG^2 + p^2 − xG·x2 − b − q1·p = 0`. +fn carries_yg( + yg: &[i128; 64], + pp: &[i128; 64], + x2: &[i128; 64], + xg: &[i128; 64], + q1: &[i128; 64], + b: &[i128; 64], +) -> [i64; 64] { + let mut terms = [0i128; 64]; + for i in 0..64 { + terms[i] = conv(yg, yg, i) + conv(pp, pp, i) - conv(x2, xg, i) - conv(q1, pp, i) - b[i]; + } + limb_carries(&terms) +} + +/// ECDAS `λ` relation: +/// `op·(λ(xG−xA) − yG + yA) + (1−op)(2λyA − 3xA²) + (r − q0)p = 0`. +#[allow(clippy::too_many_arguments)] +fn carries_lambda( + op: u8, + lam: &[i128; 64], + xg: &[i128; 64], + xa: &[i128; 64], + ya: &[i128; 64], + yg: &[i128; 64], + r: &[i128; 64], + pp: &[i128; 64], + q0: &[i128; 64], +) -> [i64; 64] { + let mut terms = [0i128; 64]; + for i in 0..64 { + let branch = if op == 1 { + // op · (Σ_j λ_j (xG_{i-j} − xA_{i-j}) + (yA_i − yG_i)) + let mut s = ya[i] - yg[i]; + for j in 0..=i { + s += lam[j] * (xg[i - j] - xa[i - j]); + } + s + } else { + // (1−op) · Σ_j (2 λ_j yA_{i-j} − 3 xA_j xA_{i-j}) + let mut s = 0i128; + for j in 0..=i { + s += 2 * lam[j] * ya[i - j] - 3 * xa[j] * xa[i - j]; + } + s + }; + terms[i] = branch + conv(r, pp, i) - conv(q0, pp, i); + } + limb_carries(&terms) +} + +/// ECDAS `xR` relation: +/// `λ² − xA − xG − xR − (1−op)(xA − xG) + (r − q1)p = 0`. +#[allow(clippy::too_many_arguments)] +fn carries_xr( + op: u8, + lam: &[i128; 64], + xa: &[i128; 64], + xg: &[i128; 64], + xr: &[i128; 64], + r: &[i128; 64], + pp: &[i128; 64], + q1: &[i128; 64], +) -> [i64; 64] { + let mut terms = [0i128; 64]; + for i in 0..64 { + let op_term = if op == 0 { xa[i] - xg[i] } else { 0 }; + terms[i] = + conv(lam, lam, i) - xa[i] - xg[i] - xr[i] - op_term + conv(r, pp, i) - conv(q1, pp, i); + } + limb_carries(&terms) +} + +/// ECDAS `yR` relation: `λ(xA − xR) − yA − yR + (r − q2)p = 0`. +#[allow(clippy::too_many_arguments)] +fn carries_yr( + lam: &[i128; 64], + xa: &[i128; 64], + xr: &[i128; 64], + ya: &[i128; 64], + yr: &[i128; 64], + r: &[i128; 64], + pp: &[i128; 64], + q2: &[i128; 64], +) -> [i64; 64] { + let mut terms = [0i128; 64]; + for i in 0..64 { + let mut conv_lam = 0i128; + for j in 0..=i { + conv_lam += lam[j] * (xa[i - j] - xr[i - j]); + } + terms[i] = conv_lam - ya[i] - yr[i] + conv(r, pp, i) - conv(q2, pp, i); + } + limb_carries(&terms) +} + +// ========================================================================= +// BigInt helpers +// ========================================================================= + +/// Little-endian 33 bytes of a non-negative value that fits in 264 bits. +fn to_le_33(v: &BigUint) -> [u8; 33] { + let mut bytes = v.to_bytes_le(); + assert!(bytes.len() <= 33, "ECSM witness: quotient exceeds 33 bytes"); + bytes.resize(33, 0); + let mut out = [0u8; 33]; + out.copy_from_slice(&bytes[..33]); + out +} + +/// `r + numerator / p`, where `numerator` must be divisible by `p`. Asserts divisibility +/// and that the result is non-negative (guaranteed by the spec quotient ranges). +fn shifted_quotient(numerator: &BigInt, p_big: &BigInt, r_big: &BigInt) -> BigUint { + assert!( + (numerator % p_big).is_zero(), + "ECSM witness: numerator not divisible by p" + ); + let q = r_big + numerator / p_big; + assert!( + !q.is_negative(), + "ECSM witness: quotient unexpectedly negative" + ); + q.to_biguint().expect("non-negative") +} + +// ========================================================================= +// Witness construction +// ========================================================================= + +/// Computes the full ECSM/ECDAS witness for `k·G` over secp256k1, given `k` and `xG` as +/// little-endian 32-byte values. This is the prover's entry point. +pub fn compute_witness(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result { + let (k, g) = prepare(k_le, xg_le)?; + + let p_big = BigInt::from(p()); + let r_big = BigInt::from(BigUint::from(3u8) * p()); // r = 3p + let r_bytes_33 = to_le_33(&(BigUint::from(3u8) * p())); + + // Common zero-extended constants. + let pp = ext64(&P_BYTES); + let r_ext = ext64(&r_bytes_33); + let b_bytes = { + let mut a = [0u8; 32]; + a[0] = B as u8; + a + }; + let b_ext = ext64(&b_bytes); + + // --- ECSM: x2 = xG^2 mod p, quotient q0 --- + let xg_sq = &g.x * &g.x; + let x2_big = &xg_sq % p(); + let q0_big = (&xg_sq - &x2_big) / p(); // exact + let xg_b = to_le_32(&g.x); + let yg_b = to_le_32(&g.y); + let x2_b = to_le_32(&x2_big); + let q0_b = to_le_32(&q0_big); + let c0 = carries_x2(&ext64(&xg_b), &ext64(&x2_b), &ext64(&q0_b), &pp); + + // --- ECSM: yG relation, quotient q1 = (yG^2 − xG·x2 − b)/p + p --- + let num_yg = BigInt::from(&g.y * &g.y) - BigInt::from(&g.x * &x2_big) - BigInt::from(B); + let q1_big = shifted_quotient(&num_yg, &p_big, &p_big); + let q1_b = to_le_33(&q1_big); + let c1 = carries_yg( + &ext64(&yg_b), + &pp, + &ext64(&x2_b), + &ext64(&xg_b), + &ext64(&q1_b), + &b_ext, + ); + + // --- scalar range data --- + let len_k = crate::curve::msb_position(&k) as u8; + let two_256 = BigUint::from(1u8) << 256u32; + let k_sub_n = to_le_32(&((&two_256 + &k) - n())); // k < N + + // --- double/add replay --- + let (steps_pts, result) = replay_double_and_add(&k, &g); + let x_r = to_le_32(&result.x); + let y_r = to_le_32(&result.y); + let x_r_sub_p = to_le_32(&((&two_256 + &result.x) - p())); + + let steps = steps_pts + .iter() + .map(|s| build_step(s, &p_big, &r_big, &r_ext, &pp)) + .collect(); + + Ok(EcsmWitness { + x_g: xg_b, + y_g: yg_b, + k: *k_le, + x2: x2_b, + q0: q0_b, + c0, + q1: q1_b, + c1, + k_sub_n, + x_r_sub_p, + len_k, + x_r, + y_r, + steps, + }) +} + +/// Builds one ECDAS step witness (λ, quotients, carries) from a point-level step. +fn build_step( + s: &StepPts, + p_big: &BigInt, + r_big: &BigInt, + r_ext: &[i128; 64], + pp: &[i128; 64], +) -> EcdasStep { + // λ is precomputed (batched) during the double-and-add replay. + let lam_b = to_le_32(&s.lambda); + let xa_b = to_le_32(&s.a.x); + let ya_b = to_le_32(&s.a.y); + let xg_b = to_le_32(&s.g.x); + let yg_b = to_le_32(&s.g.y); + let xr_b = to_le_32(&s.r.x); + let yr_b = to_le_32(&s.r.y); + + let (lam_ext, xa_ext, ya_ext, xg_ext, yg_ext, xr_ext, yr_ext) = ( + ext64(&lam_b), + ext64(&xa_b), + ext64(&ya_b), + ext64(&xg_b), + ext64(&yg_b), + ext64(&xr_b), + ext64(&yr_b), + ); + + let lam_i = BigInt::from(s.lambda.clone()); + let xa_i = BigInt::from(s.a.x.clone()); + let ya_i = BigInt::from(s.a.y.clone()); + let xg_i = BigInt::from(s.g.x.clone()); + let yg_i = BigInt::from(s.g.y.clone()); + let xr_i = BigInt::from(s.r.x.clone()); + let yr_i = BigInt::from(s.r.y.clone()); + + // q0: λ relation numerator. + let num0 = if s.op == 1 { + (&xg_i - &xa_i) * &lam_i - &yg_i + &ya_i + } else { + 2 * &lam_i * &ya_i - 3 * &xa_i * &xa_i + }; + let q0_big = shifted_quotient(&num0, p_big, r_big); + let q0_b = to_le_33(&q0_big); + + // q1: xR relation numerator λ² − xA − xG − xR + (1−op)(xG − xA). + let mut num1 = &lam_i * &lam_i - &xa_i - &xg_i - &xr_i; + if s.op == 0 { + num1 += &xg_i - &xa_i; + } + let q1_big = shifted_quotient(&num1, p_big, r_big); + let q1_b = to_le_33(&q1_big); + + // q2: yR relation numerator λ(xA − xR) − yA − yR. + let num2 = &lam_i * (&xa_i - &xr_i) - &ya_i - &yr_i; + let q2_big = shifted_quotient(&num2, p_big, r_big); + let q2_b = to_le_33(&q2_big); + + let c0 = carries_lambda( + s.op, + &lam_ext, + &xg_ext, + &xa_ext, + &ya_ext, + &yg_ext, + r_ext, + pp, + &ext64(&q0_b), + ); + let c1 = carries_xr( + s.op, + &lam_ext, + &xa_ext, + &xg_ext, + &xr_ext, + r_ext, + pp, + &ext64(&q1_b), + ); + let c2 = carries_yr( + &lam_ext, + &xa_ext, + &xr_ext, + &ya_ext, + &yr_ext, + r_ext, + pp, + &ext64(&q2_b), + ); + + EcdasStep { + x_a: xa_b, + y_a: ya_b, + x_g: xg_b, + y_g: yg_b, + round: s.round, + op: s.op, + next_op: s.next_op, + lambda: lam_b, + x_r: xr_b, + y_r: yr_b, + q0: q0_b, + q1: q1_b, + q2: q2_b, + c0, + c1, + c2, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::scalar_mul_x; + + fn gx_le() -> [u8; 32] { + let gx = BigUint::parse_bytes( + b"79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", + 16, + ) + .unwrap(); + to_le_32(&gx) + } + + /// Drives `compute_witness` (whose internal asserts validate every carry/quotient) + /// across many scalars, and cross-checks the result against the reference scalar mul. + #[test] + fn witness_is_self_consistent_for_many_scalars() { + let gx = gx_le(); + // small scalars plus bit patterns that exercise add/double scheduling + let scalars: &[u64] = &[1, 2, 3, 4, 5, 7, 8, 0xFF, 0x101, 0xABCD, 0xFFFF, 123456789]; + for &kv in scalars { + let k = to_le_32(&BigUint::from(kv)); + let w = compute_witness(&k, &gx).expect("witness"); + // final point matches reference + assert_eq!(w.x_r, scalar_mul_x(&k, &gx).unwrap(), "k = {kv}"); + // len_k is the true MSB position + assert_eq!(w.len_k as u32, 63 - (kv.leading_zeros()), "k = {kv}"); + } + } + + #[test] + fn k_one_has_no_ecdas_steps() { + let w = compute_witness(&to_le_32(&BigUint::from(1u8)), &gx_le()).unwrap(); + assert!(w.steps.is_empty()); + assert_eq!(w.x_r, w.x_g); // 1·G = G + assert_eq!(w.len_k, 0); + } + + #[test] + fn ecdas_step_schedule_matches_double_and_add() { + // k = 5 = 0b101: double(G)->2G [bit1=0], double(2G)->4G [bit0=1], add(4G,G)->5G. + let w = compute_witness(&to_le_32(&BigUint::from(5u8)), &gx_le()).unwrap(); + assert_eq!(w.len_k, 2); + let ops: Vec<(u8, u8, u8)> = w.steps.iter().map(|s| (s.round, s.op, s.next_op)).collect(); + assert_eq!(ops, vec![(1, 0, 0), (0, 0, 1), (0, 1, 0)]); + } + + #[test] + fn witness_works_near_curve_order() { + let gx = gx_le(); + let w = compute_witness(&to_le_32(&(n() - BigUint::from(1u8))), &gx).unwrap(); + assert_eq!(w.x_r, gx); // (N-1)·G = -G shares x with G + assert_eq!(w.len_k, 255); + } +} diff --git a/executor/Cargo.toml b/executor/Cargo.toml index d03fcd15c..280d3ba6b 100644 --- a/executor/Cargo.toml +++ b/executor/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] thiserror = "1.0.68" rustc-demangle = "0.1" +ecsm = { path = "../crypto/ecsm" } [dev-dependencies] serde = { version = "1.0", features = ["derive"] } diff --git a/executor/programs/asm/test_ecsm.s b/executor/programs/asm/test_ecsm.s new file mode 100644 index 000000000..670eae487 --- /dev/null +++ b/executor/programs/asm/test_ecsm.s @@ -0,0 +1,46 @@ + .attribute 5, "rv64i2p1_m2p0_zmmul1p0" +.Lfunc_end0: + .globl main +main: + # Stack layout (96 bytes): xG at sp+0, k at sp+32, xR at sp+64. + addi sp, sp, -96 + + # xG = secp256k1 Gx, little-endian (4 doublewords). + li t0, 0x59F2815B16F81798 + sd t0, 0(sp) + li t0, 0x029BFCDB2DCE28D9 + sd t0, 8(sp) + li t0, 0x55A06295CE870B07 + sd t0, 16(sp) + li t0, 0x79BE667EF9DCBBAC + sd t0, 24(sp) + + # k = 5 (little-endian); exercises double, double, add. + li t0, 5 + sd t0, 32(sp) + sd zero, 40(sp) + sd zero, 48(sp) + sd zero, 56(sp) + + # ECSM ecall: a0 = &xR, a1 = &xG, a2 = &k, a7 = -3. + addi a0, sp, 64 + addi a1, sp, 0 + addi a2, sp, 32 + li a7, -3 + ecall + + # Commit the 32-byte result xR so the test can check it equals x(5G). + # Commit syscall: a0 = fd(1), a1 = buf_addr, a2 = count, a7 = 64. + li a0, 1 + addi a1, sp, 64 + li a2, 32 + li a7, 64 + ecall + + # Restore stack and halt. + addi sp, sp, 96 + li a0, 0 + li a7, 93 + ecall +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/executor/programs/asm/test_ecsm_multi.s b/executor/programs/asm/test_ecsm_multi.s new file mode 100644 index 000000000..67aff4021 --- /dev/null +++ b/executor/programs/asm/test_ecsm_multi.s @@ -0,0 +1,71 @@ + .attribute 5, "rv64i2p1_m2p0_zmmul1p0" +.Lfunc_end0: + .globl main +main: + # Stack layout (96 bytes): xG at sp+0, k at sp+32, xR at sp+64. + addi sp, sp, -96 + + # xG = secp256k1 Gx, little-endian (written once; reused by all calls). + li t0, 0x59F2815B16F81798 + sd t0, 0(sp) + li t0, 0x029BFCDB2DCE28D9 + sd t0, 8(sp) + li t0, 0x55A06295CE870B07 + sd t0, 16(sp) + li t0, 0x79BE667EF9DCBBAC + sd t0, 24(sp) + + # k's high doublewords stay zero for all calls; only k[0] changes. + sd zero, 40(sp) + sd zero, 48(sp) + sd zero, 56(sp) + + # --- call 1: k = 1 (no ECDAS steps; start/final tuples cancel directly) --- + li t0, 1 + sd t0, 32(sp) + addi a0, sp, 64 + addi a1, sp, 0 + addi a2, sp, 32 + li a7, -3 + ecall + li a0, 1 + addi a1, sp, 64 + li a2, 32 + li a7, 64 + ecall + + # --- call 2: k = 5 (double, double, add) --- + li t0, 5 + sd t0, 32(sp) + addi a0, sp, 64 + addi a1, sp, 0 + addi a2, sp, 32 + li a7, -3 + ecall + li a0, 1 + addi a1, sp, 64 + li a2, 32 + li a7, 64 + ecall + + # --- call 3: k = 0xABCDEF (24-bit; many doubles + several adds) --- + li t0, 0xABCDEF + sd t0, 32(sp) + addi a0, sp, 64 + addi a1, sp, 0 + addi a2, sp, 32 + li a7, -3 + ecall + li a0, 1 + addi a1, sp, 64 + li a2, 32 + li a7, 64 + ecall + + # Restore stack and halt. + addi sp, sp, 96 + li a0, 0 + li a7, 93 + ecall +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/executor/programs/rust/ecsm/.cargo/config.toml b/executor/programs/rust/ecsm/.cargo/config.toml new file mode 100644 index 000000000..ca99a3f45 --- /dev/null +++ b/executor/programs/rust/ecsm/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.riscv64im-lambda-vm-elf] +rustflags = [ + "--cfg", "getrandom_backend=\"custom\"", + "-C", "passes=lower-atomic" +] diff --git a/executor/programs/rust/ecsm/Cargo.lock b/executor/programs/rust/ecsm/Cargo.lock new file mode 100644 index 000000000..d0e71eeb0 --- /dev/null +++ b/executor/programs/rust/ecsm/Cargo.lock @@ -0,0 +1,331 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "const-default" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" + +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + +[[package]] +name = "ecsm" +version = "0.1.0" +dependencies = [ + "lambda-vm-syscalls", +] + +[[package]] +name = "embedded-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" +dependencies = [ + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", +] + +[[package]] +name = "embedded-hal" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89" + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "lambda-vm-syscalls" +version = "0.1.0" +dependencies = [ + "embedded-alloc", + "getrandom 0.2.17", + "getrandom 0.3.4", + "lazy_static", + "rand", + "riscv", + "thiserror", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linked_list_allocator" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897" + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "riscv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05cfa3f7b30c84536a9025150d44d26b8e1cc20ddf436448d74cd9591eefb25" +dependencies = [ + "critical-section", + "embedded-hal", + "paste", + "riscv-macros", + "riscv-pac", +] + +[[package]] +name = "riscv-macros" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d323d13972c1b104aa036bc692cd08b822c8bbf23d79a27c526095856499799" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "riscv-pac" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436" + +[[package]] +name = "rlsf" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a" +dependencies = [ + "cfg-if", + "const-default", + "libc", + "rustversion", + "svgbobdoc", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "svgbobdoc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" +dependencies = [ + "base64", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "zerocopy" +version = "0.8.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5361301a1d9e5dd94c524eb99365fbaed5b237e831d7f45e2ddea11ffe8627" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "422033a2245cb4b6ff8def11b2dfaf184a2ab2573f5af28082a163a68889af0e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] diff --git a/executor/programs/rust/ecsm/Cargo.toml b/executor/programs/rust/ecsm/Cargo.toml new file mode 100644 index 000000000..c99ea4e06 --- /dev/null +++ b/executor/programs/rust/ecsm/Cargo.toml @@ -0,0 +1,9 @@ +[workspace] + +[package] +name = "ecsm" +version = "0.1.0" +edition = "2024" + +[dependencies] +lambda-vm-syscalls = { path = "../../../../syscalls" } diff --git a/executor/programs/rust/ecsm/src/main.rs b/executor/programs/rust/ecsm/src/main.rs new file mode 100644 index 000000000..709d4a4ae --- /dev/null +++ b/executor/programs/rust/ecsm/src/main.rs @@ -0,0 +1,20 @@ +use lambda_vm_syscalls as syscalls; + +/// Computes 5·G on secp256k1 via the ECSM precompile (Rust-guest path) and commits the +/// 32-byte x-coordinate as public output. +pub fn main() { + // secp256k1 Gx, given big-endian then reversed to little-endian for the precompile. + let mut xg: [u8; 32] = [ + 0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + xg.reverse(); + + let mut k = [0u8; 32]; + k[0] = 5; + + let mut xr = [0u8; 32]; + syscalls::syscalls::ecsm_mul(&mut xr, &xg, &k); + syscalls::syscalls::commit(&xr); +} diff --git a/executor/src/tests/ecsm_tests.rs b/executor/src/tests/ecsm_tests.rs new file mode 100644 index 000000000..486865a64 --- /dev/null +++ b/executor/src/tests/ecsm_tests.rs @@ -0,0 +1,151 @@ +//! Tests for the ECSM (elliptic-curve scalar multiplication) syscall. + +use crate::vm::instruction::decoding::Instruction; +use crate::vm::instruction::execution::{ECSM_SYSCALL_NUMBER, ExecutionError}; +use crate::vm::memory::Memory; +use crate::vm::registers::Registers; + +/// secp256k1 generator x-coordinate, little-endian. +fn gx_le() -> [u8; 32] { + let mut be = [ + 0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + be.reverse(); + be +} + +fn write_u256_le(memory: &mut Memory, addr: u64, bytes: &[u8; 32]) { + for i in 0..4 { + let mut dw = [0u8; 8]; + dw.copy_from_slice(&bytes[i * 8..i * 8 + 8]); + memory + .store_doubleword(addr + (i as u64) * 8, u64::from_le_bytes(dw)) + .unwrap(); + } +} + +fn read_u256_le(memory: &Memory, addr: u64) -> [u8; 32] { + let mut out = [0u8; 32]; + for i in 0..4 { + let dw = memory.load_doubleword(addr + (i as u64) * 8).unwrap(); + out[i * 8..i * 8 + 8].copy_from_slice(&dw.to_le_bytes()); + } + out +} + +/// Runs the ECSM syscall with the given scalar (as little-endian bytes) and `xG`, +/// returning the `xR` written back to memory. +fn run_ecsm(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<[u8; 32], ExecutionError> { + let mut pc = 0; + let mut registers = Registers::default(); + let mut memory = Memory::default(); + + let addr_xr = 0x1000u64; + let addr_xg = 0x2000u64; + let addr_k = 0x3000u64; + write_u256_le(&mut memory, addr_xg, xg_le); + write_u256_le(&mut memory, addr_k, k_le); + + registers.write(17, ECSM_SYSCALL_NUMBER).unwrap(); + registers.write(10, addr_xr).unwrap(); + registers.write(11, addr_xg).unwrap(); + registers.write(12, addr_k).unwrap(); + + Instruction::EcallEbreak.run(&mut pc, &mut registers, &mut memory)?; + Ok(read_u256_le(&memory, addr_xr)) +} + +fn k_le(v: u64) -> [u8; 32] { + let mut k = [0u8; 32]; + k[..8].copy_from_slice(&v.to_le_bytes()); + k +} + +#[test] +fn ecsm_syscall_writes_correct_result() { + let xg = gx_le(); + // 1·G = G + assert_eq!(run_ecsm(&k_le(1), &xg).unwrap(), xg); + // Matches the reference scalar multiplication for several scalars. + for v in [2u64, 3, 5, 0xFFFF, 1_000_003] { + assert_eq!( + run_ecsm(&k_le(v), &xg).unwrap(), + ecsm::scalar_mul_x(&k_le(v), &xg).unwrap(), + "k = {v}" + ); + } +} + +#[test] +fn ecsm_syscall_rejects_zero_scalar() { + let err = run_ecsm(&k_le(0), &gx_le()).unwrap_err(); + assert!(matches!( + err, + ExecutionError::Ecsm(ecsm::EcsmError::ScalarIsZero) + )); +} + +#[test] +fn ecsm_syscall_rejects_non_canonical_xg() { + // xG = p + 1 (the alias of x = 1) must error, not silently reduce: with + // k = 1 the executor would echo the non-canonical bytes back as xR, which + // the prover's xR < p range check cannot prove. + let mut xg = ecsm::P_BYTES; + xg[0] += 1; // p ends in 0x2F little-endian, so no carry + let err = run_ecsm(&k_le(1), &xg).unwrap_err(); + assert!(matches!( + err, + ExecutionError::Ecsm(ecsm::EcsmError::CoordinateOutOfRange) + )); +} + +/// Runs the ECSM syscall with caller-chosen operand addresses, `xG = Gx` and `k = 5`. +fn run_ecsm_at(addr_xr: u64, addr_xg: u64, addr_k: u64) -> Result<(), ExecutionError> { + let mut pc = 0; + let mut registers = Registers::default(); + let mut memory = Memory::default(); + write_u256_le(&mut memory, addr_xg, &gx_le()); + write_u256_le(&mut memory, addr_k, &k_le(5)); + registers.write(17, ECSM_SYSCALL_NUMBER).unwrap(); + registers.write(10, addr_xr).unwrap(); + registers.write(11, addr_xg).unwrap(); + registers.write(12, addr_k).unwrap(); + Instruction::EcallEbreak.run(&mut pc, &mut registers, &mut memory)?; + Ok(()) +} + +#[test] +fn ecsm_syscall_rejects_overlapping_xg_k() { + // xG and k are read at the same proof timestamp, so overlapping ranges + // would make the trace unprovable — the executor must reject them upfront. + for addr_k in [0x2000u64, 0x2008, 0x2018, 0x1FE8] { + let err = run_ecsm_at(0x1000, 0x2000, addr_k).unwrap_err(); + assert!( + matches!(err, ExecutionError::EcsmOperandOverlap), + "addr_k = {addr_k:#x} overlaps addr_xg and must be rejected" + ); + } + // Touching-but-disjoint ranges are fine (boundary: |diff| = 32)... + run_ecsm_at(0x1000, 0x2000, 0x2020).expect("disjoint k above xG must run"); + run_ecsm_at(0x1000, 0x2000, 0x1FE0).expect("disjoint k below xG must run"); + // ...and xR may alias xG (its accesses are offset to later timestamps). + run_ecsm_at(0x2000, 0x2000, 0x3000).expect("xR aliasing xG is allowed"); +} + +#[test] +fn ecsm_syscall_rejects_address_overflow() { + // addr_k near the lower-limb boundary so (addr mod 2^32) + 31 overflows. + let mut pc = 0; + let mut registers = Registers::default(); + let mut memory = Memory::default(); + registers.write(17, ECSM_SYSCALL_NUMBER).unwrap(); + registers.write(10, 0x1000).unwrap(); + registers.write(11, 0x2000).unwrap(); + registers.write(12, 0xFFFF_FFF0).unwrap(); // (mod 2^32) + 31 ≥ 2^32 + let err = Instruction::EcallEbreak + .run(&mut pc, &mut registers, &mut memory) + .unwrap_err(); + assert!(matches!(err, ExecutionError::EcsmAddressOverflow)); +} diff --git a/executor/src/tests/mod.rs b/executor/src/tests/mod.rs index 448a05dee..456607433 100644 --- a/executor/src/tests/mod.rs +++ b/executor/src/tests/mod.rs @@ -1,3 +1,4 @@ +pub mod ecsm_tests; pub mod flamegraph_tests; pub mod keccak_tests; pub mod memory_tests; diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs index d9b0e1c8d..0922a878e 100644 --- a/executor/src/vm/instruction/execution.rs +++ b/executor/src/vm/instruction/execution.rs @@ -1,7 +1,7 @@ use crate::vm::{ instruction::decoding::{ArithOp, Comparison, Instruction, LoadStoreWidth}, logs::Log, - memory::Memory, + memory::{Memory, MemoryError}, registers::Registers, }; @@ -14,6 +14,8 @@ pub enum SyscallNumbers { Panic = 2, Commit = 64, Halt = 93, + // Placeholder discriminant. The actual syscall value is ECSM_SYSCALL_NUMBER. + Ecsm = 94, } /// Syscall number for KeccakPermute (u64::MAX - 1 = 0xFFFF_FFFF_FFFF_FFFE). @@ -22,6 +24,17 @@ pub enum SyscallNumbers { pub const KECCAK_SYSCALL_NUMBER: u64 = u64::MAX - 1; const KECCAK_STATE_BYTES: u64 = 25 * 8; +/// Syscall number for the ECSM (elliptic-curve scalar multiply) accelerator. +/// +/// The spec uses ECALL number `-3`; interpreted as an unsigned 64-bit value that is +/// `u64::MAX - 2 = 0xFFFF_FFFF_FFFF_FFFD`, which the ECSM core table puts on the `Ecall` +/// bus as `[lo32, hi32] = [2^32 - 3, 2^32 - 1]`. +pub const ECSM_SYSCALL_NUMBER: u64 = u64::MAX - 2; + +/// `2^32`. The lower 32-bit limb of an address must not overflow when the small per-access +/// offsets are added (ECSM spec address-alignment assumptions). +const LOW_LIMB: u64 = 1 << 32; + impl TryFrom for SyscallNumbers { type Error = (); fn try_from(value: u64) -> Result { @@ -31,11 +44,37 @@ impl TryFrom for SyscallNumbers { 64 => Ok(SyscallNumbers::Commit), 93 => Ok(SyscallNumbers::Halt), v if v == KECCAK_SYSCALL_NUMBER => Ok(SyscallNumbers::KeccakPermute), + v if v == ECSM_SYSCALL_NUMBER => Ok(SyscallNumbers::Ecsm), _ => Err(()), } } } +/// Reads a 256-bit little-endian value as four doublewords at `addr + 8i`. +fn load_u256_le(memory: &Memory, addr: u64) -> Result<[u8; 32], MemoryError> { + let mut out = [0u8; 32]; + for i in 0..4 { + let dw = memory.load_doubleword(addr + (i as u64) * 8)?; + out[i * 8..i * 8 + 8].copy_from_slice(&dw.to_le_bytes()); + } + Ok(out) +} + +/// Writes a 256-bit little-endian value as four doublewords at `addr + 8i`. +fn store_u256_le(memory: &mut Memory, addr: u64, bytes: &[u8; 32]) -> Result<(), MemoryError> { + for i in 0..4 { + let mut dw = [0u8; 8]; + dw.copy_from_slice(&bytes[i * 8..i * 8 + 8]); + memory.store_doubleword(addr + (i as u64) * 8, u64::from_le_bytes(dw))?; + } + Ok(()) +} + +/// Checks the ECSM address-alignment assumption: `(addr mod 2^32) + max_offset < 2^32`. +fn ecsm_addr_ok(addr: u64, max_offset: u64) -> bool { + (addr % LOW_LIMB) + max_offset < LOW_LIMB +} + impl Instruction { /// Runs the given instruction and returns its execution log pub fn run( @@ -359,6 +398,36 @@ impl Instruction { } src2_val = state_addr; } + SyscallNumbers::Ecsm => { + // ECSM(-3): k×G on secp256k1. + // x10 = addr to write xR, x11 = addr of xG, x12 = addr of k. + // xG, k, xR are 32-byte little-endian values. + let addr_xr = registers.read(10)?; + let addr_xg = registers.read(11)?; + let addr_k = registers.read(12)?; + if !ecsm_addr_ok(addr_xg, 24) + || !ecsm_addr_ok(addr_xr, 24) + || !ecsm_addr_ok(addr_k, 31) + { + return Err(ExecutionError::EcsmAddressOverflow); + } + // xG and k are both read at the same proof timestamp, so their + // 32-byte ranges must be disjoint or the trace is unprovable + // (MEMW orders accesses per address by strictly increasing + // timestamp). xR may alias either: its accesses are offset to + // later timestamps. + if addr_xg.abs_diff(addr_k) < 32 { + return Err(ExecutionError::EcsmOperandOverlap); + } + let xg = load_u256_le(memory, addr_xg)?; + let k = load_u256_le(memory, addr_k)?; + let xr = ecsm::scalar_mul_x(&k, &xg)?; + store_u256_le(memory, addr_xr, &xr)?; + // Carry the input addresses for the prover; addr_xR = x10 is recovered + // from the register state. + src2_val = addr_xg; + dst_val = addr_k; + } SyscallNumbers::Halt => { // halt return Ok(Log { @@ -535,6 +604,12 @@ pub enum ExecutionError { UnalignedKeccakStateAddress(u64), #[error("Keccak state address range overflows: {0:#018x}")] KeccakStateAddressOverflow(u64), + #[error("ECSM address range overflows the lower 32-bit limb")] + EcsmAddressOverflow, + #[error("ECSM xG and k operand ranges overlap")] + EcsmOperandOverlap, + #[error("ECSM scalar multiplication error: {0}")] + Ecsm(#[from] ecsm::EcsmError), } // ============================================================================= diff --git a/prover/Cargo.toml b/prover/Cargo.toml index 90c723732..da9ceb9af 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -18,6 +18,7 @@ stark = { path = "../crypto/stark" } crypto = { path = "../crypto/crypto" } math = { path = "../crypto/math" } executor = { path = "../executor" } +ecsm = { path = "../crypto/ecsm" } serde = { version = "1.0", features = ["derive"] } rayon = { version = "1.8.0", optional = true } sysinfo = { version = "0.31", default-features = false, features = ["system"] } diff --git a/prover/src/lib.rs b/prover/src/lib.rs index e11c539b5..81233d39f 100644 --- a/prover/src/lib.rs +++ b/prover/src/lib.rs @@ -49,11 +49,11 @@ use crate::tables::trace_builder::count_table_lengths; use crate::tables::types::BusId; use crate::test_utils::{ E, F, VmAir, create_bitwise_air, create_branch_air, create_bytewise_air, create_commit_air, - create_cpu_air, create_cpu32_air, create_decode_air, create_dvrm_air, create_eq_air, - create_halt_air, create_keccak_air, create_keccak_rc_air, create_keccak_rnd_air, - create_load_air, create_lt_air, create_memw_air, create_memw_aligned_air, - create_memw_register_air, create_mul_air, create_page_air, create_register_air, - create_shift_air, create_store_air, + create_cpu_air, create_cpu32_air, create_decode_air, create_dvrm_air, create_ec_scalar_air, + create_ecdas_air, create_ecsm_air, create_eq_air, create_halt_air, create_keccak_air, + create_keccak_rc_air, create_keccak_rnd_air, create_load_air, create_lt_air, create_memw_air, + create_memw_aligned_air, create_memw_register_air, create_mul_air, create_page_air, + create_register_air, create_shift_air, create_store_air, }; use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions}; @@ -71,6 +71,11 @@ pub struct RuntimePageRange { pub count: u64, } +/// Number of tables that always contribute exactly one sub-proof, regardless +/// of `TableCounts`: bitwise, decode, halt, commit, keccak, keccak_rnd, +/// keccak_rc, register, ecsm, ec_scalar, ecdas. +pub const FIXED_TABLE_COUNT: usize = 11; + /// Number of chunks for each split table. /// The verifier needs this to reconstruct matching AIRs. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -223,6 +228,9 @@ pub(crate) struct VmAirs { pub keccak: VmAir, pub keccak_rnd: VmAir, pub keccak_rc: VmAir, + pub ecsm: VmAir, + pub ec_scalar: VmAir, + pub ecdas: VmAir, pub register: VmAir, pub pages: Vec, pub memw_registers: Vec, @@ -244,6 +252,9 @@ impl VmAirs { (&self.keccak, &mut traces.keccak, &()), (&self.keccak_rnd, &mut traces.keccak_rnd, &()), (&self.keccak_rc, &mut traces.keccak_rc, &()), + (&self.ecsm, &mut traces.ecsm, &()), + (&self.ec_scalar, &mut traces.ec_scalar, &()), + (&self.ecdas, &mut traces.ecdas, &()), (&self.register, &mut traces.register, &()), ]; @@ -314,6 +325,9 @@ impl VmAirs { &self.keccak, &self.keccak_rnd, &self.keccak_rc, + &self.ecsm, + &self.ec_scalar, + &self.ecdas, &self.register, ]; @@ -454,6 +468,9 @@ impl VmAirs { tables::keccak_rc::preprocessed_commitment(proof_options), tables::keccak_rc::NUM_PRECOMPUTED_COLS, ); + let ecsm = create_ecsm_air(proof_options); + let ec_scalar = create_ec_scalar_air(proof_options); + let ecdas = create_ecdas_air(proof_options); let register = create_register_air(proof_options).with_preprocessed( register::preprocessed_commitment(proof_options, elf.entry_point), register::NUM_PREPROCESSED_COLS, @@ -530,6 +547,9 @@ impl VmAirs { keccak, keccak_rnd, keccak_rc, + ecsm, + ec_scalar, + ecdas, register, pages, memw_registers, @@ -890,11 +910,12 @@ pub fn verify_with_options( ); // Cross-check: table_counts must match the number of sub-proofs. - // Fixed tables (bitwise, decode, halt, commit, keccak, keccak_rnd, keccak_rc, register) = 8, plus page tables. - let expected_proof_count = vm_proof.table_counts.total() + 8 + page_configs.len(); + // FIXED_TABLE_COUNT always-present tables, plus page tables. + let expected_proof_count = + vm_proof.table_counts.total() + FIXED_TABLE_COUNT + page_configs.len(); if expected_proof_count != vm_proof.proof.proofs.len() { return Err(Error::InvalidTableCounts(format!( - "table_counts total ({}) + 8 fixed + {} pages = {}, but proof contains {} sub-proofs", + "table_counts total ({}) + {FIXED_TABLE_COUNT} fixed + {} pages = {}, but proof contains {} sub-proofs", vm_proof.table_counts.total(), page_configs.len(), expected_proof_count, diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs index ea5fc94dc..450595ec9 100644 --- a/prover/src/tables/cpu.rs +++ b/prover/src/tables/cpu.rs @@ -185,6 +185,9 @@ pub struct CpuOperation { pub ecall_keccak: bool, /// For KeccakPermute ECALLs: state address from x10. pub keccak_state_addr: u64, + + /// Whether this ECALL is an ECSM (elliptic-curve scalar multiply) syscall + pub ecall_ecsm: bool, } impl CpuOperation { @@ -228,6 +231,10 @@ impl CpuOperation { let ecall_keccak = f.ecall && log.src1_val == executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER; let keccak_state_addr = if ecall_keccak { log.src2_val } else { 0 }; + // The ECSM operand addresses (x10/x11/x12) are recovered from the register state + // in the trace builder. + let ecall_ecsm = + f.ecall && log.src1_val == executor::vm::instruction::execution::ECSM_SYSCALL_NUMBER; // Word instructions are fully handled by CPU32; the main CPU row is a // delegate that only advances the PC and sends the CPU32 lookup. We still @@ -345,6 +352,7 @@ impl CpuOperation { commit_count, ecall_keccak, keccak_state_addr, + ecall_ecsm, } } diff --git a/prover/src/tables/ec_scalar.rs b/prover/src/tables/ec_scalar.rs new file mode 100644 index 000000000..66c574116 --- /dev/null +++ b/prover/src/tables/ec_scalar.rs @@ -0,0 +1,374 @@ +//! EC_SCALAR chip — serves the scalar `k` bit-by-bit to the ECDAS chip. +//! +//! One row per scalar byte (32 rows per ECSM ecall, `offset` counting down 31→0). Each row +//! receives a `ServeK[timestamp, ptr, offset]` token, reads byte `k[offset]` from memory, +//! decomposes it into 8 bits, and sends one `Bit[timestamp, 8*offset + i]` token per set bit +//! (the multiplicity is the bit itself). Unless `last_limb` (offset 0) it recurses by sending +//! `ServeK[timestamp, ptr, offset-1]` — a self-referential bus, like COMMIT's `CommitNextByte`. +//! +//! ## Columns (15 total) +//! - `timestamp`: DWordWL (2) — the ECALL timestamp +//! - `ptr`: DWordWL (2) — address of `k` (= `addr_k`) +//! - `offset`: Byte (1) — index of the scalar byte served by this row +//! - `limb_bits`: Bit[8] (8) — bit decomposition of `k[offset]` +//! - `last_limb`: Bit (1) — whether `offset == 0` (terminates the recursion) +//! - `mu`: Bit (1) — multiplicity (1 for real rows, 0 for padding) +//! +//! `limb = Σ 2^i · limb_bits[i]` is virtual (a linear combination, never stored). + +use math::field::element::FieldElement; +use math::field::traits::{IsField, IsSubFieldOf}; +use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::table::TableView; +use stark::trace::TraceTable; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::constraints::templates::new_is_bit_constraints; + +// ========================================================================= +// Column indices +// ========================================================================= + +pub mod cols { + pub const TIMESTAMP_0: usize = 0; + pub const TIMESTAMP_1: usize = 1; + pub const PTR_0: usize = 2; + pub const PTR_1: usize = 3; + pub const OFFSET: usize = 4; + /// limb_bits[0..8] + pub const LIMB_BITS: usize = 5; + pub const LAST_LIMB: usize = 13; + pub const MU: usize = 14; + + pub const NUM_COLUMNS: usize = 15; + + #[inline] + pub const fn limb_bit(i: usize) -> usize { + LIMB_BITS + i + } +} + +// ========================================================================= +// Operation struct +// ========================================================================= + +/// One EC_SCALAR row: serving byte `offset` of the scalar at `ptr`. +#[derive(Debug, Clone)] +pub struct EcScalarOperation { + pub timestamp: u64, + pub ptr: u64, + pub offset: u8, + pub limb: u8, + pub last_limb: bool, +} + +/// Expands a scalar `k` (little-endian bytes) and its ECALL timestamp / address into the +/// 32 EC_SCALAR rows (offsets 31 down to 0). +pub fn rows_for_scalar(timestamp: u64, addr_k: u64, k: &[u8; 32]) -> Vec { + (0..32) + .rev() + .map(|offset| EcScalarOperation { + timestamp, + ptr: addr_k, + offset: offset as u8, + limb: k[offset], + last_limb: offset == 0, + }) + .collect() +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +pub fn generate_ec_scalar_trace( + ops: &[EcScalarOperation], +) -> TraceTable { + let n = ops.len(); + let num_rows = n.next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for (row_idx, op) in ops.iter().enumerate() { + let base = row_idx * cols::NUM_COLUMNS; + data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF); + data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32); + data[base + cols::PTR_0] = FE::from(op.ptr & 0xFFFF_FFFF); + data[base + cols::PTR_1] = FE::from(op.ptr >> 32); + data[base + cols::OFFSET] = FE::from(op.offset as u64); + for i in 0..8 { + data[base + cols::limb_bit(i)] = FE::from(((op.limb >> i) & 1) as u64); + } + data[base + cols::LAST_LIMB] = FE::from(op.last_limb as u64); + data[base + cols::MU] = FE::one(); + } + + // Padding rows keep every field 0: all IS_BIT constraints hold (0 is a bit) and the + // implication constraints (a·b = 0) hold trivially. + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +/// `limb = Σ 2^i · limb_bits[i]` as a single bus element (used as the byte value in MEMW). +fn limb_value() -> BusValue { + BusValue::linear( + (0..8) + .map(|i| LinearTerm::Column { + coefficient: 1i64 << i, + column: cols::limb_bit(i), + }) + .collect(), + ) +} + +pub fn bus_interactions() -> Vec { + let ts = || { + [ + BusValue::Packed { + start_column: cols::TIMESTAMP_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }, + ] + }; + let ptr = || { + [ + BusValue::Packed { + start_column: cols::PTR_0, + packing: Packing::Direct, + }, + BusValue::Packed { + start_column: cols::PTR_1, + packing: Packing::Direct, + }, + ] + }; + + let mut interactions = Vec::with_capacity(11); + + // 1. Receive ServeK[timestamp, ptr, offset] (mult = mu). + { + let [t0, t1] = ts(); + let [p0, p1] = ptr(); + interactions.push(BusInteraction::receiver( + BusId::ServeK, + Multiplicity::Column(cols::MU), + vec![ + t0, + t1, + p0, + p1, + BusValue::Packed { + start_column: cols::OFFSET, + packing: Packing::Direct, + }, + ], + )); + } + + // 2. MEMW: read byte k[offset] at ptr+offset, timestamp+1, width 1 (mult = mu). + // CO24 layout: [old[8], is_register, base[2], value[8], ts[2], w2, w4, w8]. + { + let base_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::PTR_0, + }, + LinearTerm::Column { + coefficient: 1, + column: cols::OFFSET, + }, + ]); + let base_hi = BusValue::Packed { + start_column: cols::PTR_1, + packing: Packing::Direct, + }; + let ts_lo_plus_1 = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::TIMESTAMP_0, + }, + LinearTerm::Constant(1), + ]); + let ts_hi = BusValue::Packed { + start_column: cols::TIMESTAMP_1, + packing: Packing::Direct, + }; + let mut values = Vec::with_capacity(24); + // old[0..8]: read value = limb, rest 0 + values.push(limb_value()); + for _ in 1..8 { + values.push(BusValue::constant(0)); + } + values.push(BusValue::constant(0)); // is_register = 0 + values.push(base_lo); + values.push(base_hi); + // value[0..8]: same as old (read) + values.push(limb_value()); + for _ in 1..8 { + values.push(BusValue::constant(0)); + } + values.push(ts_lo_plus_1); + values.push(ts_hi); + values.push(BusValue::constant(0)); // w2 + values.push(BusValue::constant(0)); // w4 + values.push(BusValue::constant(0)); // w8 (width 1 byte) + interactions.push(BusInteraction::sender( + BusId::Memw, + Multiplicity::Column(cols::MU), + values, + )); + } + + // 3. Send Bit[timestamp, 8*offset + i] for each set bit (mult = limb_bits[i]). + for i in 0..8 { + let [t0, t1] = ts(); + interactions.push(BusInteraction::sender( + BusId::Bit, + Multiplicity::Column(cols::limb_bit(i)), + vec![ + t0, + t1, + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 8, + column: cols::OFFSET, + }, + LinearTerm::Constant(i as i64), + ]), + ], + )); + } + + // 4. Recurse: send ServeK[timestamp, ptr, offset-1] (mult = mu - last_limb). + { + let [t0, t1] = ts(); + let [p0, p1] = ptr(); + interactions.push(BusInteraction::sender( + BusId::ServeK, + Multiplicity::Diff(cols::MU, cols::LAST_LIMB), + vec![ + t0, + t1, + p0, + p1, + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::OFFSET, + }, + LinearTerm::Constant(-1), + ]), + ], + )); + } + + interactions +} + +// ========================================================================= +// Constraints +// ========================================================================= + +/// `a · b = 0` or `a · (1 - b) = 0` (degree 2), used for the spec's implication +/// constraints (`limb_bits_i = 1 ⇒ μ = 1`, `last_limb ⇒ μ`, `last_limb ⇒ offset = 0`). +pub struct MulZeroConstraint { + pub a: usize, + pub b: usize, + /// when true, the second factor is `(1 - b)` instead of `b` + pub b_complement: bool, + pub constraint_idx: usize, +} + +impl TransitionConstraint for MulZeroConstraint { + fn degree(&self) -> usize { + 2 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let a = step.get_main_evaluation_element(0, self.a).clone(); + let b = step.get_main_evaluation_element(0, self.b).clone(); + if self.b_complement { + a * (FieldElement::::one() - b) + } else { + a * b + } + } +} + +/// Creates all EC_SCALAR transition constraints (20 total). +pub fn create_constraints( + constraint_idx_start: usize, +) -> ( + Vec>>, + usize, +) { + let mut constraints: Vec< + Box>, + > = Vec::with_capacity(20); + let mut idx = constraint_idx_start; + + // IS_BIT for mu, limb_bits[0..8], last_limb. + let mut bit_cols = vec![cols::MU]; + bit_cols.extend((0..8).map(cols::limb_bit)); + bit_cols.push(cols::LAST_LIMB); + let (bit_constraints, next) = new_is_bit_constraints(&bit_cols, idx); + for c in bit_constraints { + constraints.push(c.boxed()); + } + idx = next; + + // limb_bits[i] = 1 ⇒ mu = 1 : limb_bits[i] · (1 - mu) = 0 + for i in 0..8 { + constraints.push( + MulZeroConstraint { + a: cols::limb_bit(i), + b: cols::MU, + b_complement: true, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + + // last_limb = 1 ⇒ mu = 1 : last_limb · (1 - mu) = 0 + constraints.push( + MulZeroConstraint { + a: cols::LAST_LIMB, + b: cols::MU, + b_complement: true, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + // last_limb = 1 ⇒ offset = 0 : last_limb · offset = 0 + constraints.push( + MulZeroConstraint { + a: cols::LAST_LIMB, + b: cols::OFFSET, + b_complement: false, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + (constraints, idx) +} diff --git a/prover/src/tables/ecdas.rs b/prover/src/tables/ecdas.rs new file mode 100644 index 000000000..48183b810 --- /dev/null +++ b/prover/src/tables/ecdas.rs @@ -0,0 +1,518 @@ +//! ECDAS chip — one double/add step of the scalar-multiplication sequence. +//! +//! Each row receives an accumulator `(A, G, round, op)` on the self-referential `Ecdas` +//! bus, computes `R = 2A` (op=0) or `R = A + G` (op=1) via three byte-limb convolution +//! relations (`λ`, `xR`, `yR`, each with a 33-byte quotient + 64-entry carry array and the +//! offset `r = 3p`), and sends the updated accumulator back with `round − (1 − next_op)` +//! and `next_op`. When `next_op = 1` it consumes the scalar bit at `round` on the `Bit` +//! bus (an add follows). ECSM seeds and drains the bus; interior rows telescope. +//! +//! See `spec/ecdas.toml`. Constraints are **unconditional**; padding rows set the quotients +//! to `r` and `op = 0`, which makes every relation hold with zero carries. + +use math::field::element::FieldElement; +use math::field::traits::{IsField, IsSubFieldOf}; +use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::table::TableView; +use stark::trace::TraceTable; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::constraints::templates::IsBitConstraint; +use crate::tables::ecsm::ecdas_tuple; +use ecsm::{EcdasStep, P_BYTES}; + +/// `r = 3·p` as 33 little-endian bytes (the spec offset that keeps all quotients positive). +pub const R_BYTES: [u8; 33] = [ + 0x8D, 0xF4, 0xFF, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x02, +]; + +// ========================================================================= +// Column indices (~521 columns) +// ========================================================================= + +pub mod cols { + pub const TIMESTAMP_0: usize = 0; + pub const TIMESTAMP_1: usize = 1; + pub const XG: usize = 2; // U256BL (32) + pub const YG: usize = 34; + pub const XA: usize = 66; + pub const YA: usize = 98; + pub const ROUND: usize = 130; // Byte + pub const OP: usize = 131; // Bit + pub const XR: usize = 132; // U256BL (32) + pub const YR: usize = 164; + pub const LAMBDA: usize = 196; // U256BL (32) + pub const Q0: usize = 228; // Byte[33] + pub const C0: usize = 261; // BaseField[64] + pub const Q1: usize = 325; // Byte[33] + pub const C1: usize = 358; // BaseField[64] + pub const Q2: usize = 422; // Byte[33] + pub const C2: usize = 455; // BaseField[64] + pub const NEXT_OP: usize = 519; // Bit + pub const MU: usize = 520; + + pub const NUM_COLUMNS: usize = 521; + + #[inline] + pub const fn c0(i: usize) -> usize { + C0 + i + } + #[inline] + pub const fn c1(i: usize) -> usize { + C1 + i + } + #[inline] + pub const fn c2(i: usize) -> usize { + C2 + i + } +} + +// ========================================================================= +// Operation struct +// ========================================================================= + +/// One ECDAS row: a double/add step witness plus its ECALL timestamp. +#[derive(Debug, Clone)] +pub struct EcdasOperation { + pub timestamp: u64, + pub step: EcdasStep, +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +fn fe_from_i64(c: i64) -> FE { + if c >= 0 { + FE::from(c as u64) + } else { + FE::zero() - FE::from((-c) as u64) + } +} + +fn write_bytes(data: &mut [FE], base: usize, col: usize, bytes: &[u8]) { + for (i, &b) in bytes.iter().enumerate() { + data[base + col + i] = FE::from(b as u64); + } +} + +pub fn generate_ecdas_trace( + ops: &[EcdasOperation], +) -> TraceTable { + let n = ops.len(); + let num_rows = n.next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for (row_idx, op) in ops.iter().enumerate() { + let base = row_idx * cols::NUM_COLUMNS; + let s = &op.step; + + data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF); + data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32); + write_bytes(&mut data, base, cols::XG, &s.x_g); + write_bytes(&mut data, base, cols::YG, &s.y_g); + write_bytes(&mut data, base, cols::XA, &s.x_a); + write_bytes(&mut data, base, cols::YA, &s.y_a); + data[base + cols::ROUND] = FE::from(s.round as u64); + data[base + cols::OP] = FE::from(s.op as u64); + write_bytes(&mut data, base, cols::XR, &s.x_r); + write_bytes(&mut data, base, cols::YR, &s.y_r); + write_bytes(&mut data, base, cols::LAMBDA, &s.lambda); + write_bytes(&mut data, base, cols::Q0, &s.q0); + write_bytes(&mut data, base, cols::Q1, &s.q1); + write_bytes(&mut data, base, cols::Q2, &s.q2); + for i in 0..64 { + data[base + cols::c0(i)] = fe_from_i64(s.c0[i]); + data[base + cols::c1(i)] = fe_from_i64(s.c1[i]); + data[base + cols::c2(i)] = fe_from_i64(s.c2[i]); + } + data[base + cols::NEXT_OP] = FE::from(s.next_op as u64); + data[base + cols::MU] = FE::one(); + } + + // Padding rows: q0 = q1 = q2 = r, op = 0, everything else 0. This makes every + // (unconditional) convolution relation hold with zero carries. + for row_idx in n..num_rows { + let base = row_idx * cols::NUM_COLUMNS; + write_bytes(&mut data, base, cols::Q0, &R_BYTES); + write_bytes(&mut data, base, cols::Q1, &R_BYTES); + write_bytes(&mut data, base, cols::Q2, &R_BYTES); + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +fn packed(col: usize) -> BusValue { + BusValue::Packed { + start_column: col, + packing: Packing::Direct, + } +} + +pub fn bus_interactions() -> Vec { + let mu = || Multiplicity::Column(cols::MU); + let ts_lo = || packed(cols::TIMESTAMP_0); + let ts_hi = || packed(cols::TIMESTAMP_1); + let mut out = Vec::new(); + + // Receive [ts, xA, yA, xG, yG, round, op]. + out.push(BusInteraction::receiver( + BusId::Ecdas, + mu(), + ecdas_tuple( + cols::XA, + cols::YA, + cols::XG, + cols::YG, + packed(cols::ROUND), + packed(cols::OP), + ts_lo(), + ts_hi(), + ), + )); + + // IS_BYTE range checks (single byte → AreBytes[x, 0]). + let is_byte = |col: usize, len: usize, out: &mut Vec| { + for i in 0..len { + out.push(BusInteraction::sender( + BusId::AreBytes, + Multiplicity::Column(cols::MU), + vec![packed(col + i), BusValue::constant(0)], + )); + } + }; + is_byte(cols::ROUND, 1, &mut out); + is_byte(cols::LAMBDA, 32, &mut out); + is_byte(cols::Q0, 33, &mut out); + is_byte(cols::XR, 32, &mut out); + is_byte(cols::Q1, 33, &mut out); + is_byte(cols::YR, 32, &mut out); + is_byte(cols::Q2, 33, &mut out); + + // IS_HALF range checks on the carries (offsets keep them in [0, 2^16)). + let half = |col: usize, off: i64| { + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: col, + }, + LinearTerm::Constant(off), + ]) + }; + for (base, off) in [(cols::C0, 32636i64), (cols::C1, 8161), (cols::C2, 16320)] { + for i in 0..63 { + out.push(BusInteraction::sender( + BusId::IsHalfword, + mu(), + vec![half(base + i, off)], + )); + } + } + + // Receive Bit[ts, round] when adding next (mult = next_op). + out.push(BusInteraction::receiver( + BusId::Bit, + Multiplicity::Column(cols::NEXT_OP), + vec![ts_lo(), ts_hi(), packed(cols::ROUND)], + )); + + // Send the updated accumulator: [ts, xR, yR, xG, yG, round - 1 + next_op, next_op]. + out.push(BusInteraction::sender( + BusId::Ecdas, + mu(), + ecdas_tuple( + cols::XR, + cols::YR, + cols::XG, + cols::YG, + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::ROUND, + }, + LinearTerm::Column { + coefficient: 1, + column: cols::NEXT_OP, + }, + LinearTerm::Constant(-1), + ]), + packed(cols::NEXT_OP), + ts_lo(), + ts_hi(), + ), + )); + + out +} + +// ========================================================================= +// Constraints +// ========================================================================= + +fn p_byte(m: usize) -> FieldElement { + if m < 32 { + FieldElement::from(P_BYTES[m] as u64) + } else { + FieldElement::zero() + } +} + +fn r_byte(m: usize) -> FieldElement { + if m < 33 { + FieldElement::from(R_BYTES[m] as u64) + } else { + FieldElement::zero() + } +} + +#[derive(Clone, Copy)] +pub enum Relation { + Lambda, + Xr, + Yr, +} + +/// Unconditional convolution carry constraint at limb `i`: `2^8·c_i − c_{i-1} − S_i = 0`. +pub struct ConvCarry { + pub relation: Relation, + pub i: usize, + pub constraint_idx: usize, +} + +impl ConvCarry { + fn s_i(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let i = self.i; + let col = |c: usize| -> FieldElement { step.get_main_evaluation_element(0, c).clone() }; + // bytes (zero beyond the stored length) + let b = |base: usize, len: usize, j: usize| -> FieldElement { + if j < len { + col(base + j) + } else { + FieldElement::zero() + } + }; + let lam = |j: usize| b(cols::LAMBDA, 32, j); + let xg = |j: usize| b(cols::XG, 32, j); + let xa = |j: usize| b(cols::XA, 32, j); + let ya = |j: usize| b(cols::YA, 32, j); + let yg = |j: usize| b(cols::YG, 32, j); + let xr = |j: usize| b(cols::XR, 32, j); + let yr = |j: usize| b(cols::YR, 32, j); + let op = col(cols::OP); + let one = FieldElement::::one(); + + // r·P − q·P convolution (shared structure across all three relations). + let rq = |qbase: usize| -> FieldElement { + let mut s = FieldElement::::zero(); + for j in 0..=i { + s += (r_byte::(j) - b(qbase, 33, j)) * p_byte::(i - j); + } + s + }; + + match self.relation { + Relation::Lambda => { + // op·(Σ λ_j(xG-xA)_{i-j} + (yA_i - yG_i)) + let mut op_branch = ya(i) - yg(i); + for j in 0..=i { + op_branch += lam(j) * (xg(i - j) - xa(i - j)); + } + // (1-op)·Σ (2 λ_j yA_{i-j} - 3 xA_j xA_{i-j}) + let mut notop_branch = FieldElement::::zero(); + for j in 0..=i { + notop_branch = notop_branch + + FieldElement::::from(2u64) * lam(j) * ya(i - j) + - FieldElement::::from(3u64) * xa(j) * xa(i - j); + } + op.clone() * op_branch + (one - op) * notop_branch + rq(cols::Q0) + } + Relation::Xr => { + // Σ λ_j λ_{i-j} − xA_i − xG_i − xR_i − (1-op)(xA_i − xG_i) + rq + let mut s = FieldElement::::zero(); + for j in 0..=i { + s += lam(j) * lam(i - j); + } + s - xa(i) - xg(i) - xr(i) - (one - op) * (xa(i) - xg(i)) + rq(cols::Q1) + } + Relation::Yr => { + // Σ λ_j(xA-xR)_{i-j} − yA_i − yR_i + rq + let mut s = FieldElement::::zero(); + for j in 0..=i { + s += lam(j) * (xa(i - j) - xr(i - j)); + } + s - ya(i) - yr(i) + rq(cols::Q2) + } + } + } +} + +impl TransitionConstraint for ConvCarry { + fn degree(&self) -> usize { + match self.relation { + Relation::Lambda => 3, // op · (λ · Δx) + Relation::Xr | Relation::Yr => 2, + } + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let c_base = match self.relation { + Relation::Lambda => cols::C0, + Relation::Xr => cols::C1, + Relation::Yr => cols::C2, + }; + let c_i = step.get_main_evaluation_element(0, c_base + self.i).clone(); + let c_prev = if self.i == 0 { + FieldElement::::zero() + } else { + step.get_main_evaluation_element(0, c_base + self.i - 1) + .clone() + }; + FieldElement::::from(256u64) * c_i - c_prev - self.s_i(step) + } +} + +/// `col = 0` (unconditional, degree 1). Used for the closing `c_63 = 0`. +pub struct ColIsZero { + pub col: usize, + pub constraint_idx: usize, +} + +impl TransitionConstraint for ColIsZero { + fn degree(&self) -> usize { + 1 + } + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + step.get_main_evaluation_element(0, self.col).clone() + } +} + +/// `a · b = 0` or `a · (1 - b) = 0` (degree 2). +pub struct MulZero { + pub a: usize, + pub b: usize, + pub b_complement: bool, + pub constraint_idx: usize, +} + +impl TransitionConstraint for MulZero { + fn degree(&self) -> usize { + 2 + } + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let a = step.get_main_evaluation_element(0, self.a).clone(); + let b = step.get_main_evaluation_element(0, self.b).clone(); + if self.b_complement { + a * (FieldElement::::one() - b) + } else { + a * b + } + } +} + +/// Creates all ECDAS transition constraints (199 total). +pub fn create_constraints( + constraint_idx_start: usize, +) -> ( + Vec>>, + usize, +) { + let mut constraints: Vec< + Box>, + > = Vec::new(); + let mut idx = constraint_idx_start; + + // `op` needs no direct bit check: it is only ever the op field of an Ecdas bus token, and + // every producer of that token emits a bit there — ECSM seeds it with a constant 0, and + // each ECDAS step emits `next_op` (which is IS_BIT'd). The bus cannot be minted (IS_BIT(mu) + // blocks weight ≠ 1), so a row's received `op` is always in {0,1} and the λ/xR/yR selector + // `op·add + (1−op)·double` is well-defined. + for col in [cols::MU, cols::NEXT_OP] { + constraints.push(IsBitConstraint::unconditional(col, idx).boxed()); + idx += 1; + } + + // op · next_op = 0 + constraints.push( + MulZero { + a: cols::OP, + b: cols::NEXT_OP, + b_complement: false, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + // next_op · (1 - mu) = 0 + constraints.push( + MulZero { + a: cols::NEXT_OP, + b: cols::MU, + b_complement: true, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + // λ, xR, yR convolution carries + closings. + for (relation, c_base) in [ + (Relation::Lambda, cols::C0), + (Relation::Xr, cols::C1), + (Relation::Yr, cols::C2), + ] { + for i in 0..64 { + constraints.push( + ConvCarry { + relation, + i, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + constraints.push( + ColIsZero { + col: c_base + 63, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + + (constraints, idx) +} diff --git a/prover/src/tables/ecsm.rs b/prover/src/tables/ecsm.rs new file mode 100644 index 000000000..2b4444656 --- /dev/null +++ b/prover/src/tables/ecsm.rs @@ -0,0 +1,939 @@ +//! ECSM core chip — orchestrates one secp256k1 scalar multiplication `k·G`. +//! +//! One row per `ECALL(-3)`. It reads `xG` and `k` from memory, witnesses `yG` and proves +//! `yG² ≡ xG³ + b mod p` (via two byte-limb convolution relations with quotients `q0,q1` +//! and 64-entry carry arrays `c0,c1`), enforces `0 < k < N` and `xR < p`, writes `xR` back, +//! and delegates the double-and-add to ECDAS / EC_SCALAR over the `Ecdas`/`ServeK`/`Bit` +//! buses. +//! +//! See `spec/ecsm.toml`. All multi-limb arithmetic uses 8-bit limbs; the witness is built +//! by `ecsm::compute_witness`, which reproduces these exact recurrences. +//! +//! ## Padding +//! Padding rows have `mu = 0`, all columns zero **except `q1`, which pads to `p`**. This makes +//! both carry relations close on padding without gating the whole recurrence: the x² relation +//! has no standalone constant (closes at all-zero), and the yG relation closes because the +//! `p² − q1·p` offset cancels (`q1 = p`) and the curve constant `b` is multiplied by `µ` (so it +//! drops when `µ = 0`). Only that single `µ·b` term is µ-gated. The range checks / +//! virtual-carry checks remain µ-gated as before. + +use executor::vm::instruction::execution::ECSM_SYSCALL_NUMBER; +use math::field::element::FieldElement; +use math::field::traits::{IsField, IsSubFieldOf}; +use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator}; +use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing}; +use stark::table::TableView; +use stark::trace::TraceTable; + +use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField}; +use crate::constraints::templates::{INV_SHIFT_32, IsBitConstraint}; +use ecsm::{B, EcsmWitness, N_BYTES, P_BYTES}; + +// ========================================================================= +// Column indices (~427 columns) +// ========================================================================= + +pub mod cols { + pub const TIMESTAMP_0: usize = 0; + pub const TIMESTAMP_1: usize = 1; + pub const ADDR_XG_0: usize = 2; + pub const ADDR_XG_1: usize = 3; + pub const ADDR_K_0: usize = 4; + pub const ADDR_K_1: usize = 5; + pub const ADDR_XR_0: usize = 6; + pub const ADDR_XR_1: usize = 7; + + pub const XR: usize = 8; // U256BL (32) + pub const YR: usize = 40; // U256BL (32) + pub const K: usize = 72; // U256BL (32) + pub const LEN_K: usize = 104; // Byte + pub const XG: usize = 105; // U256BL (32) + pub const YG: usize = 137; // U256BL (32) + pub const X2: usize = 169; // U256BL (32) + pub const Q0: usize = 201; // U256BL (32) + pub const C0: usize = 233; // BaseField[64] + pub const Q1: usize = 297; // Byte[33] + pub const C1: usize = 330; // BaseField[64] + pub const K_SUB_N: usize = 394; // U256HL (16 halfwords) + pub const XR_SUB_P: usize = 410; // U256HL (16 halfwords) + pub const MU: usize = 426; + + pub const NUM_COLUMNS: usize = 427; + + #[inline] + pub const fn xr(i: usize) -> usize { + XR + i + } + #[inline] + pub const fn k(i: usize) -> usize { + K + i + } + #[inline] + pub const fn xg(i: usize) -> usize { + XG + i + } + #[inline] + pub const fn yg(i: usize) -> usize { + YG + i + } + #[inline] + pub const fn x2(i: usize) -> usize { + X2 + i + } + #[inline] + pub const fn q0(i: usize) -> usize { + Q0 + i + } + #[inline] + pub const fn c0(i: usize) -> usize { + C0 + i + } + #[inline] + pub const fn q1(i: usize) -> usize { + Q1 + i + } + #[inline] + pub const fn c1(i: usize) -> usize { + C1 + i + } + #[inline] + pub const fn k_sub_n(i: usize) -> usize { + K_SUB_N + i + } + #[inline] + pub const fn xr_sub_p(i: usize) -> usize { + XR_SUB_P + i + } +} + +// ========================================================================= +// Operation struct +// ========================================================================= + +/// One ECSM ecall: the math witness plus the three memory addresses and timestamp. +#[derive(Debug, Clone)] +pub struct EcsmOperation { + pub timestamp: u64, + pub addr_xg: u64, + pub addr_k: u64, + pub addr_xr: u64, + pub witness: EcsmWitness, +} + +// ========================================================================= +// Trace generation +// ========================================================================= + +/// Converts a signed carry to a field element (negatives wrap to `p − |c|`). +fn fe_from_i64(c: i64) -> FE { + if c >= 0 { + FE::from(c as u64) + } else { + FE::zero() - FE::from((-c) as u64) + } +} + +fn write_dword_wl(data: &mut [FE], base: usize, lo_col: usize, value: u64) { + data[base + lo_col] = FE::from(value & 0xFFFF_FFFF); + data[base + lo_col + 1] = FE::from(value >> 32); +} + +fn write_bytes(data: &mut [FE], base: usize, col: usize, bytes: &[u8]) { + for (i, &b) in bytes.iter().enumerate() { + data[base + col + i] = FE::from(b as u64); + } +} + +/// Writes a 32-byte little-endian value as 16 halfwords (U256HL). +fn write_halfwords(data: &mut [FE], base: usize, col: usize, bytes: &[u8; 32]) { + for j in 0..16 { + let hw = bytes[2 * j] as u64 + ((bytes[2 * j + 1] as u64) << 8); + data[base + col + j] = FE::from(hw); + } +} + +pub fn generate_ecsm_trace( + ops: &[EcsmOperation], +) -> TraceTable { + let n = ops.len(); + let num_rows = n.next_power_of_two().max(4); + let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS]; + + for (row_idx, op) in ops.iter().enumerate() { + let base = row_idx * cols::NUM_COLUMNS; + let w = &op.witness; + + write_dword_wl(&mut data, base, cols::TIMESTAMP_0, op.timestamp); + write_dword_wl(&mut data, base, cols::ADDR_XG_0, op.addr_xg); + write_dword_wl(&mut data, base, cols::ADDR_K_0, op.addr_k); + write_dword_wl(&mut data, base, cols::ADDR_XR_0, op.addr_xr); + + write_bytes(&mut data, base, cols::XR, &w.x_r); + write_bytes(&mut data, base, cols::YR, &w.y_r); + write_bytes(&mut data, base, cols::K, &w.k); + data[base + cols::LEN_K] = FE::from(w.len_k as u64); + write_bytes(&mut data, base, cols::XG, &w.x_g); + write_bytes(&mut data, base, cols::YG, &w.y_g); + write_bytes(&mut data, base, cols::X2, &w.x2); + write_bytes(&mut data, base, cols::Q0, &w.q0); + write_bytes(&mut data, base, cols::Q1, &w.q1); + write_halfwords(&mut data, base, cols::K_SUB_N, &w.k_sub_n); + write_halfwords(&mut data, base, cols::XR_SUB_P, &w.x_r_sub_p); + + for i in 0..64 { + data[base + cols::c0(i)] = fe_from_i64(w.c0[i]); + data[base + cols::c1(i)] = fe_from_i64(w.c1[i]); + } + + data[base + cols::MU] = FE::one(); + } + + // Padding rows (`mu = 0`) must carry `q1 = p` so the yG carry relation closes: the + // `p² − q1·p` offset cancels and the µ-gated `b` term drops. Bytes 0..31 hold p; byte 32 + // stays 0 (a valid IS_BIT value). + for row_idx in n..num_rows { + let base = row_idx * cols::NUM_COLUMNS; + write_bytes(&mut data, base, cols::Q1, &P_BYTES); + } + + TraceTable::new_main(data, cols::NUM_COLUMNS, 1) +} + +// ========================================================================= +// Bus value helpers +// ========================================================================= + +fn packed(col: usize) -> BusValue { + BusValue::Packed { + start_column: col, + packing: Packing::Direct, + } +} + +/// `[old[8], is_register, base_lo, base_hi, value[8], ts_lo, ts_hi, w2, w4, w8]` — +/// a 24-element MEMW **read** tuple (`old == value`). +#[allow(clippy::too_many_arguments)] +fn memw_read( + value: [BusValue; 8], + is_register: u64, + base_lo: BusValue, + base_hi: BusValue, + ts_lo: BusValue, + ts_hi: BusValue, + w2: u64, + w8: u64, +) -> Vec { + let mut v = Vec::with_capacity(24); + v.extend(value.clone()); // old == value (read) + v.push(BusValue::constant(is_register)); + v.push(base_lo); + v.push(base_hi); + v.extend(value); + v.push(ts_lo); + v.push(ts_hi); + v.push(BusValue::constant(w2)); + v.push(BusValue::constant(0)); + v.push(BusValue::constant(w8)); + v +} + +/// `[is_register, base_lo, base_hi, value[8], ts_lo, ts_hi, w2, w4, w8]` — +/// a 16-element MEMW **write** tuple (MEMW table supplies `old`). +fn memw_write( + value: [BusValue; 8], + base_lo: BusValue, + base_hi: BusValue, + ts_lo: BusValue, + ts_hi: BusValue, + w8: u64, +) -> Vec { + let mut v = Vec::with_capacity(16); + v.push(BusValue::constant(0)); // is_register = 0 (memory) + v.push(base_lo); + v.push(base_hi); + v.extend(value); + v.push(ts_lo); + v.push(ts_hi); + v.push(BusValue::constant(0)); // w2 + v.push(BusValue::constant(0)); // w4 + v.push(BusValue::constant(w8)); + v +} + +/// The eight bytes of a 256-bit value at `col + 8*chunk` as MEMW value elements. +fn dword_bytes(col: usize, chunk: usize) -> [BusValue; 8] { + std::array::from_fn(|b| packed(col + 8 * chunk + b)) +} + +/// A register value `[lo, hi, 0, 0, 0, 0, 0, 0]` as MEMW value elements. +fn register_value(lo_col: usize, hi_col: usize) -> [BusValue; 8] { + let mut v: [BusValue; 8] = std::array::from_fn(|_| BusValue::constant(0)); + v[0] = packed(lo_col); + v[1] = packed(hi_col); + v +} + +/// The 32 bytes of a U256BL coordinate as bus elements (shared shape for the ECDAS bus, +/// used identically by ECSM and ECDAS). +pub fn point_coord_busvalues(col: usize) -> Vec { + (0..32).map(|b| packed(col + b)).collect() +} + +// ========================================================================= +// Bus interactions +// ========================================================================= + +pub fn bus_interactions() -> Vec { + let mu = || Multiplicity::Column(cols::MU); + let ts_lo = || packed(cols::TIMESTAMP_0); + let ts_hi = || packed(cols::TIMESTAMP_1); + let mut out = Vec::new(); + + // ECALL receiver (mult = mu): [ts_lo, ts_hi, syscall_lo32, syscall_hi32]. + out.push(BusInteraction::receiver( + BusId::Ecall, + mu(), + vec![ + ts_lo(), + ts_hi(), + BusValue::constant(ECSM_SYSCALL_NUMBER & 0xFFFF_FFFF), + BusValue::constant(ECSM_SYSCALL_NUMBER >> 32), + ], + )); + + // read x11 -> addr_xG (register read at ts). + out.push(BusInteraction::sender( + BusId::Memw, + mu(), + memw_read( + register_value(cols::ADDR_XG_0, cols::ADDR_XG_1), + 1, + BusValue::constant(2 * 11), + BusValue::constant(0), + ts_lo(), + ts_hi(), + 1, + 0, + ), + )); + // read xG: 4 doublewords at addr_xG + 8i (ts). + for i in 0..4 { + let base_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::ADDR_XG_0, + }, + LinearTerm::Constant((8 * i) as i64), + ]); + out.push(BusInteraction::sender( + BusId::Memw, + mu(), + memw_read( + dword_bytes(cols::XG, i), + 0, + base_lo, + packed(cols::ADDR_XG_1), + ts_lo(), + ts_hi(), + 0, + 1, + ), + )); + } + + // read x12 -> addr_k (register read at ts). + out.push(BusInteraction::sender( + BusId::Memw, + mu(), + memw_read( + register_value(cols::ADDR_K_0, cols::ADDR_K_1), + 1, + BusValue::constant(2 * 12), + BusValue::constant(0), + ts_lo(), + ts_hi(), + 1, + 0, + ), + )); + // read k: 4 doublewords at addr_k + 8i (ts). + for i in 0..4 { + let base_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::ADDR_K_0, + }, + LinearTerm::Constant((8 * i) as i64), + ]); + out.push(BusInteraction::sender( + BusId::Memw, + mu(), + memw_read( + dword_bytes(cols::K, i), + 0, + base_lo, + packed(cols::ADDR_K_1), + ts_lo(), + ts_hi(), + 0, + 1, + ), + )); + } + + // read x10 -> addr_xR (register read at ts + 1). + let ts_lo_plus = |d: i64| { + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::TIMESTAMP_0, + }, + LinearTerm::Constant(d), + ]) + }; + out.push(BusInteraction::sender( + BusId::Memw, + mu(), + memw_read( + register_value(cols::ADDR_XR_0, cols::ADDR_XR_1), + 1, + BusValue::constant(2 * 10), + BusValue::constant(0), + ts_lo_plus(1), + ts_hi(), + 1, + 0, + ), + )); + // write xR: 4 doublewords at addr_xR + 8i (ts + 2). + for i in 0..4 { + let base_lo = BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::ADDR_XR_0, + }, + LinearTerm::Constant((8 * i) as i64), + ]); + out.push(BusInteraction::sender( + BusId::Memw, + mu(), + memw_write( + dword_bytes(cols::XR, i), + base_lo, + packed(cols::ADDR_XR_1), + ts_lo_plus(2), + ts_hi(), + 1, + ), + )); + } + + // IS_BYTE range checks (single byte → AreBytes[x, 0]). + let is_byte = |col: usize, len: usize, out: &mut Vec| { + for i in 0..len { + out.push(BusInteraction::sender( + BusId::AreBytes, + Multiplicity::Column(cols::MU), + vec![packed(col + i), BusValue::constant(0)], + )); + } + }; + is_byte(cols::X2, 32, &mut out); + is_byte(cols::Q0, 32, &mut out); + is_byte(cols::YG, 32, &mut out); + is_byte(cols::Q1, 32, &mut out); // q1[0..31]; q1[32] is an IS_BIT constraint + + // IS_HALF range checks: c0[i]+8160, c1[i]+16319 (i=0..62), then k_sub_N / xR_sub_p. + let half_offset = |col: usize, off: i64| { + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: col, + }, + LinearTerm::Constant(off), + ]) + }; + for i in 0..63 { + out.push(BusInteraction::sender( + BusId::IsHalfword, + mu(), + vec![half_offset(cols::c0(i), 8160)], + )); + } + for i in 0..63 { + out.push(BusInteraction::sender( + BusId::IsHalfword, + mu(), + vec![half_offset(cols::c1(i), 16319)], + )); + } + for i in 0..16 { + out.push(BusInteraction::sender( + BusId::IsHalfword, + mu(), + vec![packed(cols::k_sub_n(i))], + )); + } + for i in 0..16 { + out.push(BusInteraction::sender( + BusId::IsHalfword, + mu(), + vec![packed(cols::xr_sub_p(i))], + )); + } + + // ZERO bus: assert k != 0 (sum of k's 32 bytes is nonzero). + out.push(BusInteraction::sender( + BusId::Zero, + mu(), + vec![ + BusValue::linear( + (0..32) + .map(|i| LinearTerm::Column { + coefficient: 1, + column: cols::k(i), + }) + .collect(), + ), + BusValue::constant(0), // expected ZERO output = 0 ⇒ input is nonzero + ], + )); + + // Delegation buses. + // SERVE_K send: [ts, addr_k, 31]. + out.push(BusInteraction::sender( + BusId::ServeK, + mu(), + vec![ + ts_lo(), + ts_hi(), + packed(cols::ADDR_K_0), + packed(cols::ADDR_K_1), + BusValue::constant(31), + ], + )); + // BIT receiver: the MSB at position len_k. + out.push(BusInteraction::receiver( + BusId::Bit, + mu(), + vec![ts_lo(), ts_hi(), packed(cols::LEN_K)], + )); + // ECDAS start: [ts, xG, yG, xG, yG, len_k - 1, 0]. + out.push(BusInteraction::sender( + BusId::Ecdas, + mu(), + ecdas_tuple( + cols::XG, + cols::YG, + cols::XG, + cols::YG, + BusValue::linear(vec![ + LinearTerm::Column { + coefficient: 1, + column: cols::LEN_K, + }, + LinearTerm::Constant(-1), + ]), + BusValue::constant(0), + ts_lo(), + ts_hi(), + ), + )); + // ECDAS final receiver: [ts, xR, yR, xG, yG, -1, 0]. + out.push(BusInteraction::receiver( + BusId::Ecdas, + mu(), + ecdas_tuple( + cols::XR, + cols::YR, + cols::XG, + cols::YG, + BusValue::linear(vec![LinearTerm::Constant(-1)]), + BusValue::constant(0), + ts_lo(), + ts_hi(), + ), + )); + + out +} + +/// Builds the ECDAS bus tuple `[ts_lo, ts_hi, accX(32), accY(32), genX(32), genY(32), +/// round, op]`. Shared so the ECSM sender and the ECDAS receiver/sender pack it identically. +#[allow(clippy::too_many_arguments)] +pub fn ecdas_tuple( + acc_x: usize, + acc_y: usize, + gen_x: usize, + gen_y: usize, + round: BusValue, + op: BusValue, + ts_lo: BusValue, + ts_hi: BusValue, +) -> Vec { + let mut v = Vec::with_capacity(2 + 4 * 32 + 2); + v.push(ts_lo); + v.push(ts_hi); + v.extend(point_coord_busvalues(acc_x)); + v.extend(point_coord_busvalues(acc_y)); + v.extend(point_coord_busvalues(gen_x)); + v.extend(point_coord_busvalues(gen_y)); + v.push(round); + v.push(op); + v +} + +// ========================================================================= +// Constraints +// ========================================================================= + +/// Which convolution relation a carry constraint enforces. +#[derive(Clone, Copy)] +pub enum Relation { + /// `xG² − x2 − q0·p = 0` + X2, + /// `yG² + p² − xG·x2 − b − q1·p = 0` + Yg, +} + +fn p_byte(m: usize) -> FieldElement { + if m < 32 { + FieldElement::from(P_BYTES[m] as u64) + } else { + FieldElement::zero() + } +} + +/// Convolution carry constraint at limb `i`: `2^8·c_i − c_{i-1} − S_i = 0`, with `c_{-1} = 0`. +/// Unconditional (degree 2); the only µ-gated term is the curve constant `µ·b` inside `S_i` +/// for the yG relation at limb 0 (see [`ConvCarry::s_i`]). +pub struct ConvCarry { + pub relation: Relation, + pub i: usize, + pub constraint_idx: usize, +} + +impl ConvCarry { + fn s_i(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let i = self.i; + let col = |c: usize| -> FieldElement { step.get_main_evaluation_element(0, c).clone() }; + let byte = |base: usize, len: usize, j: usize| -> FieldElement { + if j < len { + col(base + j) + } else { + FieldElement::zero() + } + }; + let mut s = FieldElement::::zero(); + match self.relation { + Relation::X2 => { + // Σ xG_j·xG_{i-j} − x2_i − Σ q0_j·P_{i-j} + for j in 0..=i { + s += byte(cols::XG, 32, j) * byte(cols::XG, 32, i - j); + s = s - byte(cols::Q0, 32, j) * p_byte::(i - j); + } + s = s - byte(cols::X2, 32, i); + } + Relation::Yg => { + // Σ (yG_j·yG_{i-j} + P_j·P_{i-j} − x2_j·xG_{i-j} − q1_j·P_{i-j}) − b_i + for j in 0..=i { + s += byte(cols::YG, 32, j) * byte(cols::YG, 32, i - j); + s += p_byte::(j) * p_byte::(i - j); + s = s - byte(cols::X2, 32, j) * byte(cols::XG, 32, i - j); + s = s - byte(cols::Q1, 33, j) * p_byte::(i - j); + } + if i == 0 { + // Only the curve constant `b` is gated by `µ`: it vanishes on padding + // (µ=0) and equals `b` on real rows (µ=1). `B` is the zero-extension of + // `b`, so `B_i = 0` for i ≥ 1 — nothing to gate there. The rest of the + // relation stays unconditional. + let mu = step.get_main_evaluation_element(0, cols::MU).clone(); + s = s - mu * FieldElement::::from(B); + } + } + } + s + } +} + +impl TransitionConstraint for ConvCarry { + fn degree(&self) -> usize { + 2 // degree-2 convolution; the only µ-gated term (µ·b) is degree 1 + } + + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let c_base = match self.relation { + Relation::X2 => cols::C0, + Relation::Yg => cols::C1, + }; + let c_i = step.get_main_evaluation_element(0, c_base + self.i).clone(); + let c_prev = if self.i == 0 { + FieldElement::::zero() + } else { + step.get_main_evaluation_element(0, c_base + self.i - 1) + .clone() + }; + FieldElement::::from(256u64) * c_i - c_prev - self.s_i(step) + } +} + +/// `col = 0` (unconditional, degree 1). Used for the closing `c_63 = 0`. +pub struct ColIsZero { + pub col: usize, + pub constraint_idx: usize, +} + +impl TransitionConstraint for ColIsZero { + fn degree(&self) -> usize { + 1 + } + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + step.get_main_evaluation_element(0, self.col).clone() + } +} + +/// The two 256-bit addition-overflow checks (`k < N` and `xR < p`), whose 8 word-carries +/// `c` are virtual. Each `c_i = 2^-32·(addend0_i + addend1_i + c_{i-1} − sum_i)`. The addition +/// must overflow `2^256` (carry-out `c_7 = 1`), which proves the strict inequality: +/// `k < N` is `N + k_sub_N = k + 2^256` (with `k_sub_N = k − N mod 2^256`); `xR < p` is +/// `p + xR_sub_p = xR + 2^256` (with `xR_sub_p = xR − p mod 2^256`). +#[derive(Clone, Copy)] +pub enum OverflowKind { + KLtN, + XrLtP, +} + +impl OverflowKind { + /// The constant addend's 32-bit word `i` (`N` for `k u64 { + let bytes = match self { + OverflowKind::KLtN => &N_BYTES, + OverflowKind::XrLtP => &P_BYTES, + }; + let mut w = 0u64; + for b in 0..4 { + w += (bytes[4 * i + b] as u64) << (8 * b); + } + w + } + /// Column base of the witnessed halfword addend (`k_sub_N` / `xR_sub_p`). + fn addend_hl_base(self) -> usize { + match self { + OverflowKind::KLtN => cols::K_SUB_N, + OverflowKind::XrLtP => cols::XR_SUB_P, + } + } + /// Column base of the byte sum (`k` / `xR`). + fn sum_bl_base(self) -> usize { + match self { + OverflowKind::KLtN => cols::K, + OverflowKind::XrLtP => cols::XR, + } + } +} + +/// Computes the 8 word-carries of the addition for `kind`. +fn carry_chain(kind: OverflowKind, step: &TableView) -> [FieldElement; 8] +where + F: IsSubFieldOf, + E: IsField, +{ + let inv = FieldElement::::from(INV_SHIFT_32); + let hl = kind.addend_hl_base(); + let bl = kind.sum_bl_base(); + let mut c: [FieldElement; 8] = std::array::from_fn(|_| FieldElement::zero()); + let mut prev = FieldElement::::zero(); + for (i, slot) in c.iter_mut().enumerate() { + // addend1 word i (from halfwords): hl[2i] + 2^16·hl[2i+1] + let addend1 = step.get_main_evaluation_element(0, hl + 2 * i).clone() + + step.get_main_evaluation_element(0, hl + 2 * i + 1).clone() + * FieldElement::::from(1u64 << 16); + // sum word i (from bytes): Σ bl[4i+b]·2^{8b} + let mut sum = FieldElement::::zero(); + for b in 0..4 { + sum += step.get_main_evaluation_element(0, bl + 4 * i + b).clone() + * FieldElement::::from(1u64 << (8 * b)); + } + let addend0 = FieldElement::::from(kind.const_word(i)); + let ci = (addend0 + addend1 + prev.clone() - sum) * inv.clone(); + *slot = ci.clone(); + prev = ci; + } + c +} + +/// `µ · c_i · (1 - c_i) = 0` for a virtual carry bit (degree 3, since `c_i` is linear). +pub struct CarryBit { + pub kind: OverflowKind, + pub i: usize, + pub constraint_idx: usize, +} + +impl TransitionConstraint for CarryBit { + fn degree(&self) -> usize { + 3 + } + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let c = carry_chain(self.kind, step); + let mu = step.get_main_evaluation_element(0, cols::MU).clone(); + let one = FieldElement::::one(); + mu * c[self.i].clone() * (one - c[self.i].clone()) + } +} + +/// `µ · (1 - c_7) = 0`: the top carry must be 1 (the addition overflows). +pub struct OverflowRequired { + pub kind: OverflowKind, + pub constraint_idx: usize, +} + +impl TransitionConstraint for OverflowRequired { + fn degree(&self) -> usize { + 2 + } + fn constraint_idx(&self) -> usize { + self.constraint_idx + } + fn evaluate(&self, step: &TableView) -> FieldElement + where + F: IsSubFieldOf, + E: IsField, + { + let c = carry_chain(self.kind, step); + let mu = step.get_main_evaluation_element(0, cols::MU).clone(); + mu * (FieldElement::::one() - c[7].clone()) + } +} + +/// Creates all ECSM transition constraints (148 total). +pub fn create_constraints( + constraint_idx_start: usize, +) -> ( + Vec>>, + usize, +) { + let mut constraints: Vec< + Box>, + > = Vec::new(); + let mut idx = constraint_idx_start; + + // IS_BIT(mu) + constraints.push(IsBitConstraint::unconditional(cols::MU, idx).boxed()); + idx += 1; + + // x2 convolution: 64 carries + closing. + for i in 0..64 { + constraints.push( + ConvCarry { + relation: Relation::X2, + i, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + constraints.push( + ColIsZero { + col: cols::c0(63), + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + // yG convolution: 64 carries + closing. + for i in 0..64 { + constraints.push( + ConvCarry { + relation: Relation::Yg, + i, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + constraints.push( + ColIsZero { + col: cols::c1(63), + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + // IS_BIT(q1[32]) + constraints.push(IsBitConstraint::unconditional(cols::q1(32), idx).boxed()); + idx += 1; + + // k < N: 7 carry bits + overflow-required. + for i in 0..7 { + constraints.push( + CarryBit { + kind: OverflowKind::KLtN, + i, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + constraints.push( + OverflowRequired { + kind: OverflowKind::KLtN, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + // xR < p: 7 carry bits + overflow-required. + for i in 0..7 { + constraints.push( + CarryBit { + kind: OverflowKind::XrLtP, + i, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + } + constraints.push( + OverflowRequired { + kind: OverflowKind::XrLtP, + constraint_idx: idx, + } + .boxed(), + ); + idx += 1; + + (constraints, idx) +} diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs index 4401307a9..50bc399af 100644 --- a/prover/src/tables/mod.rs +++ b/prover/src/tables/mod.rs @@ -29,6 +29,9 @@ pub mod cpu; pub mod cpu32; pub mod decode; pub mod dvrm; +pub mod ec_scalar; +pub mod ecdas; +pub mod ecsm; pub mod eq; pub mod halt; pub mod keccak; diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs index e9fa9b7d3..7947fa088 100644 --- a/prover/src/tables/trace_builder.rs +++ b/prover/src/tables/trace_builder.rs @@ -45,6 +45,9 @@ use super::cpu::{self, CpuOperation}; use super::cpu32; use super::decode; use super::dvrm::{self, DvrmOperation}; +use super::ec_scalar; +use super::ecdas; +use super::ecsm; use super::eq; use super::halt; use super::keccak::{self, KeccakOperation}; @@ -350,7 +353,8 @@ fn collect_cpu_ops( /// /// MEMW and LOAD collection requires sequential processing with state tracking. /// -/// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops) +/// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops, +/// cpu32_ops, ecsm_ops, ec_scalar_ops, ecdas_ops) #[allow(clippy::type_complexity)] fn collect_ops_from_cpu( cpu_ops: &[CpuOperation], @@ -365,6 +369,9 @@ fn collect_ops_from_cpu( Vec, Vec, Vec, + Vec, + Vec, + Vec, ) { let mut memw_ops = Vec::with_capacity(cpu_ops.len() * 3); let mut load_ops = Vec::with_capacity(cpu_ops.len() / 8 + 1); @@ -374,6 +381,9 @@ fn collect_ops_from_cpu( let mut commit_ops = Vec::new(); let mut keccak_ops = Vec::new(); let mut cpu32_ops = Vec::new(); + let mut ecsm_ops = Vec::new(); + let mut ec_scalar_ops = Vec::new(); + let mut ecdas_ops = Vec::new(); let mut current_commit_index = 0u32; let mut commit_ecall_count = 0u32; @@ -455,6 +465,16 @@ fn collect_ops_from_cpu( }); } + // Collect ECSM ecall operations (memory I/O + the three table row sets) + if op.ecall_ecsm { + let (ecsm_memw, ecsm_op, ec_scalar_rows, ecdas_rows) = + collect_ecsm_ops(op, memory_state, register_state); + memw_ops.extend(ecsm_memw); + ecsm_ops.push(ecsm_op); + ec_scalar_ops.extend(ec_scalar_rows); + ecdas_ops.extend(ecdas_rows); + } + // --- ALU chip dispatch (no state tracking) --- // Word (`*W`) instructions are delegated to CPU32 (which itself drives // the ALU chips); the main CPU does not send the ALU bus for them, so we @@ -505,6 +525,9 @@ fn collect_ops_from_cpu( commit_ops, keccak_ops, cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, ) } @@ -612,6 +635,128 @@ fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState) memw_op } +/// Collects all MEMW ops and the ECSM / EC_SCALAR / ECDAS table ops for one ECSM ecall. +/// +/// Timestamp scheme (within the instruction's 4-wide budget): the `x11`/`x12` register reads +/// and the `xG`/`k` memory reads happen at `T`; the `x10` register read and the EC_SCALAR +/// byte reads at `T + 1`; the `xR` memory writes at `T + 2`. Every read advances +/// `memory_state` / `register_state` (the offline read-old + write-new model), so later +/// accesses always observe a strictly smaller old timestamp. +#[allow(clippy::needless_range_loop)] +fn collect_ecsm_ops( + op: &CpuOperation, + memory_state: &mut MemoryState, + register_state: &mut RegisterState, +) -> ( + Vec, + ecsm::EcsmOperation, + Vec, + Vec, +) { + let t = op.timestamp; + let addr_xr = register_state.read(10).0; + let addr_xg = register_state.read(11).0; + let addr_k = register_state.read(12).0; + + // Read the xG and k operands (32 little-endian bytes each) from memory. + let mut xg = [0u8; 32]; + let mut k = [0u8; 32]; + for i in 0..32 { + xg[i] = memory_state.read_byte(addr_xg.wrapping_add(i as u64)).0; + k[i] = memory_state.read_byte(addr_k.wrapping_add(i as u64)).0; + } + + let witness = ::ecsm::compute_witness(&k, &xg) + .expect("ECSM witness: executor validates 0 < k < N and xG on curve"); + + let mut memw_ops = Vec::with_capacity(47); + + // x11 -> addr_xG, x12 -> addr_k (register reads at T). + for reg in [11u8, 12u8] { + let (val, old_ts) = register_state.read(reg); + let value = pack_register_value(val); + memw_ops.push( + MemwOperation::new(true, 2 * reg as u64, value, t, 2, true) + .with_old(value, [old_ts, old_ts, 0, 0, 0, 0, 0, 0]), + ); + register_state.write(reg, val, t); + } + + // xG and k: 4 doubleword reads each at T. + for (base, bytes) in [(addr_xg, &witness.x_g), (addr_k, &witness.k)] { + for i in 0..4 { + let addr = base.wrapping_add((8 * i) as u64); + let mut value = [0u64; 8]; + let mut dword = 0u64; + for j in 0..8 { + value[j] = bytes[8 * i + j] as u64; + dword |= (bytes[8 * i + j] as u64) << (8 * j); + } + let (_old, old_ts) = memory_state.read_bytes(addr, 8); + memw_ops + .push(MemwOperation::new(false, addr, value, t, 8, true).with_old(value, old_ts)); + memory_state.write_bytes(addr, dword, 8, t); + } + } + + // x10 -> addr_xR (register read at T + 1). + { + let (val, old_ts) = register_state.read(10); + let value = pack_register_value(val); + memw_ops.push( + MemwOperation::new(true, 2 * 10, value, t + 1, 2, true) + .with_old(value, [old_ts, old_ts, 0, 0, 0, 0, 0, 0]), + ); + register_state.write(10, val, t + 1); + } + + // EC_SCALAR byte reads of k at T + 1 (one per scalar byte). + for offset in 0..32u64 { + let addr = addr_k.wrapping_add(offset); + let byte = k[offset as usize]; + let value = [byte as u64, 0, 0, 0, 0, 0, 0, 0]; + let (_v, old_ts) = memory_state.read_byte(addr); + memw_ops.push( + MemwOperation::new(false, addr, value, t + 1, 1, true) + .with_old(value, [old_ts, 0, 0, 0, 0, 0, 0, 0]), + ); + memory_state.write_byte(addr, byte, t + 1); + } + + // xR writes at T + 2 (4 doublewords). + for i in 0..4 { + let addr = addr_xr.wrapping_add((8 * i) as u64); + let mut value = [0u64; 8]; + let mut dword = 0u64; + for j in 0..8 { + value[j] = witness.x_r[8 * i + j] as u64; + dword |= (witness.x_r[8 * i + j] as u64) << (8 * j); + } + let (old_vals, old_ts) = memory_state.read_bytes(addr, 8); + memw_ops.push( + MemwOperation::new(false, addr, value, t + 2, 8, false).with_old(old_vals, old_ts), + ); + memory_state.write_bytes(addr, dword, 8, t + 2); + } + + let ec_scalar_ops = ec_scalar::rows_for_scalar(t, addr_k, &witness.k); + let ecdas_ops = witness + .steps + .iter() + .cloned() + .map(|step| ecdas::EcdasOperation { timestamp: t, step }) + .collect(); + let ecsm_op = ecsm::EcsmOperation { + timestamp: t, + addr_xg, + addr_k, + addr_xr, + witness, + }; + + (memw_ops, ecsm_op, ec_scalar_ops, ecdas_ops) +} + /// Collects register read/write operations (M1, M3, M5) from CpuOperation. /// /// Returns: Vec of MEMW operations for register accesses @@ -1857,6 +2002,81 @@ fn collect_bitwise_from_commit(commit_ops: &[CommitOperation]) -> Vec BitwiseOperation { + BitwiseOperation::halfword( + BitwiseOperationType::IsHalf, + (v & 0xFF) as u8, + (v >> 8) as u8, + ) +} + +/// IS_BYTE lookup for a single byte (sent as `AreBytes[byte, 0]`). +fn is_byte_op(b: u8) -> BitwiseOperation { + BitwiseOperation::byte_op(BitwiseOperationType::AreBytes, b, 0) +} + +/// BITWISE lookups sent by the ECSM core table (range checks + the `k != 0` ZERO check), +/// so the BITWISE receiver multiplicities account for them. +#[allow(clippy::needless_range_loop)] +pub(crate) fn collect_bitwise_from_ecsm(ops: &[ecsm::EcsmOperation]) -> Vec { + let mut out = Vec::new(); + for op in ops { + let w = &op.witness; + // IS_BYTE on x2, q0, yG, q1[0..31]. + for i in 0..32 { + out.push(is_byte_op(w.x2[i])); + out.push(is_byte_op(w.q0[i])); + out.push(is_byte_op(w.y_g[i])); + out.push(is_byte_op(w.q1[i])); + } + // IS_HALF on the carries c0[i]+8160, c1[i]+16319 (i = 0..62). + for i in 0..63 { + out.push(is_half_op((w.c0[i] + 8160) as u16)); + out.push(is_half_op((w.c1[i] + 16319) as u16)); + } + // IS_HALF on the U256HL limbs of k_sub_N and xR_sub_p. + for i in 0..16 { + out.push(is_half_op( + w.k_sub_n[2 * i] as u16 + ((w.k_sub_n[2 * i + 1] as u16) << 8), + )); + out.push(is_half_op( + w.x_r_sub_p[2 * i] as u16 + ((w.x_r_sub_p[2 * i + 1] as u16) << 8), + )); + } + // ZERO: assert k != 0 (sum of k's bytes). + let sum: u32 = w.k.iter().map(|&b| b as u32).sum(); + out.push(BitwiseOperation::zero(sum)); + } + out +} + +/// BITWISE lookups sent by every ECDAS row (range checks on the byte limbs + carries). +#[allow(clippy::needless_range_loop)] +pub(crate) fn collect_bitwise_from_ecdas(ops: &[ecdas::EcdasOperation]) -> Vec { + let mut out = Vec::new(); + for op in ops { + let s = &op.step; + out.push(is_byte_op(s.round)); + for i in 0..32 { + out.push(is_byte_op(s.lambda[i])); + out.push(is_byte_op(s.x_r[i])); + out.push(is_byte_op(s.y_r[i])); + } + for i in 0..33 { + out.push(is_byte_op(s.q0[i])); + out.push(is_byte_op(s.q1[i])); + out.push(is_byte_op(s.q2[i])); + } + for i in 0..63 { + out.push(is_half_op((s.c0[i] + 32636) as u16)); + out.push(is_half_op((s.c1[i] + 8161) as u16)); + out.push(is_half_op((s.c2[i] + 16320) as u16)); + } + } + out +} + /// Collect BITWISE lookups generated by the keccak chips. /// /// The keccak round chip sends BYTE_ALU, HWSL, and ARE_BYTES @@ -2238,6 +2458,15 @@ pub struct Traces { /// KECCAK_RC precomputed round constant table (32 rows) pub keccak_rc: TraceTable, + /// ECSM core table (one row per scalar-multiplication ecall) + pub ecsm: TraceTable, + + /// EC_SCALAR table (32 rows per ecall) + pub ec_scalar: TraceTable, + + /// ECDAS double/add table (variable rows per ecall) + pub ecdas: TraceTable, + /// MEMW_R register-only fast-path traces (split into chunks of max_rows::MEMW_R) pub memw_registers: Vec>, // Auxiliary ALU / memory / CPU32 dispatch chips (split into chunks of their max_rows) @@ -2268,6 +2497,10 @@ struct CollectedOps { bytewise_ops: Vec, store_ops: Vec, cpu32_ops: Vec, + // EC scalar-multiplication accelerator chips. + ecsm_ops: Vec, + ec_scalar_ops: Vec, + ecdas_ops: Vec, } /// Chunk raw ops and generate one trace table per chunk. When `storage_mode` @@ -2314,6 +2547,9 @@ fn collect_all_ops( commit_ops: Vec, keccak_ops: Vec, cpu32_ops: Vec, + ecsm_ops: Vec, + ec_scalar_ops: Vec, + ecdas_ops: Vec, register_state: &mut RegisterState, ) -> CollectedOps { // HALT finalization: 33 register MEMW operations at timestamp u64::MAX. @@ -2445,6 +2681,9 @@ fn collect_all_ops( bytewise_ops, store_ops, cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, } } @@ -2483,6 +2722,9 @@ fn build_traces( bytewise_ops, store_ops, cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, } = ops; // ===================================================================== @@ -2526,6 +2768,8 @@ fn build_traces( bitwise_ops.extend(collect_bitwise_from_commit(&commit_ops)); // KECCAK_RND sends XOR/AND/ARE_BYTES/HWSL; KECCAK core sends IS_HALF bitwise_ops.extend(collect_bitwise_from_keccak(&keccak_ops)); + bitwise_ops.extend(collect_bitwise_from_ecsm(&ecsm_ops)); + bitwise_ops.extend(collect_bitwise_from_ecdas(&ecdas_ops)); // CPU padding rows send ARE_BYTES with all-zero values. // Add corresponding ops so the bitwise table multiplicities balance. @@ -2693,6 +2937,11 @@ fn build_traces( let mut keccak_rc_trace = keccak_rc::generate_keccak_rc_trace(); keccak_rc::update_multiplicities(&mut keccak_rc_trace, keccak_ops.len()); + // ECSM accelerator traces (empty/all-padding for programs that do not use ECSM). + let ecsm_trace = ecsm::generate_ecsm_trace(&ecsm_ops); + let ec_scalar_trace = ec_scalar::generate_ec_scalar_trace(&ec_scalar_ops); + let ecdas_trace = ecdas::generate_ecdas_trace(&ecdas_ops); + #[allow(unused_mut)] let (mut pages, page_configs, mut register_trace, mut halt_trace); #[cfg(feature = "parallel")] @@ -2784,6 +3033,9 @@ fn build_traces( keccak: keccak_trace, keccak_rnd: keccak_rnd_trace, keccak_rc: keccak_rc_trace, + ecsm: ecsm_trace, + ec_scalar: ec_scalar_trace, + ecdas: ecdas_trace, memw_registers, eqs, bytewises, @@ -3037,6 +3289,9 @@ impl Traces { use super::decode::NUM_PRECOMPUTED_COLS as DECODE_PRECOMPUTED; use super::decode::cols::NUM_COLUMNS as DECODE_COLS; use super::dvrm::cols::NUM_COLUMNS as DVRM_COLS; + use super::ec_scalar::cols::NUM_COLUMNS as EC_SCALAR_COLS; + use super::ecdas::cols::NUM_COLUMNS as ECDAS_COLS; + use super::ecsm::cols::NUM_COLUMNS as ECSM_COLS; use super::eq::cols::NUM_COLUMNS as EQ_COLS; use super::halt::cols::NUM_COLUMNS as HALT_COLS; use super::keccak::cols::NUM_COLUMNS as KECCAK_COLS; @@ -3075,6 +3330,9 @@ impl Traces { keccak, keccak_rnd, keccak_rc, + ecsm, + ec_scalar, + ecdas, memw_registers, eqs, bytewises, @@ -3138,6 +3396,9 @@ impl Traces { for t in cpu32s { total += (t.num_rows() * CPU32_COLS) as u64; } + total += (ecsm.num_rows() * ECSM_COLS) as u64; + total += (ec_scalar.num_rows() * EC_SCALAR_COLS) as u64; + total += (ecdas.num_rows() * ECDAS_COLS) as u64; total } @@ -3177,6 +3438,9 @@ impl Traces { let n_bytewise = aux_cols(super::bytewise::bus_interactions().len()); let n_store = aux_cols(super::store::bus_interactions().len()); let n_cpu32 = aux_cols(super::cpu32::bus_interactions().len()); + let n_ecsm = aux_cols(super::ecsm::bus_interactions().len()); + let n_ec_scalar = aux_cols(super::ec_scalar::bus_interactions().len()); + let n_ecdas = aux_cols(super::ecdas::bus_interactions().len()); let Traces { cpus, @@ -3197,6 +3461,9 @@ impl Traces { keccak, keccak_rnd, keccak_rc, + ecsm, + ec_scalar, + ecdas, memw_registers, eqs, bytewises, @@ -3260,6 +3527,9 @@ impl Traces { for t in cpu32s { total += (t.num_rows() * n_cpu32) as u64; } + total += (ecsm.num_rows() * n_ecsm) as u64; + total += (ec_scalar.num_rows() * n_ec_scalar) as u64; + total += (ecdas.num_rows() * n_ecdas) as u64; total } @@ -3418,8 +3688,19 @@ impl Traces { let mut memory_state = MemoryState::from_elf(elf); memory_state.add_private_input(private_input); let mut register_state = RegisterState::new(elf.entry_point); - let (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops, cpu32_ops) = - collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + let ( + memw_ops, + load_ops, + lt_ops, + shift_ops, + bitwise_ops, + commit_ops, + keccak_ops, + cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, + ) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); let ops = collect_all_ops( cpu_ops, @@ -3431,6 +3712,9 @@ impl Traces { commit_ops, keccak_ops, cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, &mut register_state, ); @@ -3468,8 +3752,19 @@ impl Traces { let mut memory_state = MemoryState::new(); let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc); let mut register_state = RegisterState::new(entry_point); - let (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops, cpu32_ops) = - collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); + let ( + memw_ops, + load_ops, + lt_ops, + shift_ops, + bitwise_ops, + commit_ops, + keccak_ops, + cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, + ) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state); let ops = collect_all_ops( cpu_ops, @@ -3481,6 +3776,9 @@ impl Traces { commit_ops, keccak_ops, cpu32_ops, + ecsm_ops, + ec_scalar_ops, + ecdas_ops, &mut register_state, ); diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs index 195b1e005..bc16ce780 100644 --- a/prover/src/tables/types.rs +++ b/prover/src/tables/types.rs @@ -128,6 +128,18 @@ pub enum BusId { /// CPU → CPU32 delegation of word (`*W`) instructions: /// `CPU32[timestamp, pc, instruction_length]`. Cpu32 = 27, + + // ========================================================================= + // EC scalar multiplication accelerator (ECSM / ECDAS / EC_SCALAR) + // ========================================================================= + /// ECDAS self-referential double/add sequence bus: + /// (timestamp, xA, yA, xG, yG, round, op). ECSM seeds and drains it. + Ecdas = 28, + /// EC_SCALAR self-referential scalar-byte server bus: (timestamp, ptr, offset). + ServeK = 29, + /// Scalar-bit bus: EC_SCALAR sends one per set bit (timestamp, bit_index); + /// ECDAS receives one per add, ECSM receives the MSB. + Bit = 30, } impl BusId { @@ -154,6 +166,9 @@ impl BusId { BusId::Alu => "Alu", BusId::MemoryOp => "MemoryOp", BusId::Cpu32 => "Cpu32", + BusId::Ecdas => "Ecdas", + BusId::ServeK => "ServeK", + BusId::Bit => "Bit", } } } @@ -183,6 +198,9 @@ impl TryFrom for BusId { 25 => Ok(BusId::Alu), 26 => Ok(BusId::MemoryOp), 27 => Ok(BusId::Cpu32), + 28 => Ok(BusId::Ecdas), + 29 => Ok(BusId::ServeK), + 30 => Ok(BusId::Bit), other => Err(other), } } diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs index 31434f5ab..fd9d9d40c 100644 --- a/prover/src/test_utils.rs +++ b/prover/src/test_utils.rs @@ -58,6 +58,11 @@ use crate::tables::decode::{bus_interactions as decode_bus_interactions, cols as use crate::tables::dvrm::{ bus_interactions as dvrm_bus_interactions, cols as dvrm_cols, dvrm_constraints, }; +use crate::tables::ec_scalar::{ + bus_interactions as ec_scalar_bus_interactions, cols as ec_scalar_cols, +}; +use crate::tables::ecdas::{bus_interactions as ecdas_bus_interactions, cols as ecdas_cols}; +use crate::tables::ecsm::{bus_interactions as ecsm_bus_interactions, cols as ecsm_cols}; use crate::tables::eq::{bus_interactions as eq_bus_interactions, cols as eq_cols, eq_constraints}; use crate::tables::halt::{bus_interactions as halt_bus_interactions, cols as halt_cols}; use crate::tables::keccak::{bus_interactions as keccak_bus_interactions, cols as keccak_cols}; @@ -1040,3 +1045,51 @@ pub fn create_keccak_rc_air(proof_options: &ProofOptions) -> VmAir { ) .with_name("KECCAK_RC") } + +/// Create ECSM core AIR (secp256k1 scalar-multiplication orchestrator). +pub fn create_ecsm_air(proof_options: &ProofOptions) -> VmAir { + let (transition_constraints, _) = crate::tables::ecsm::create_constraints(0); + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: ecsm_bus_interactions(), + }; + AirWithBuses::new( + ecsm_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("ECSM") +} + +/// Create EC_SCALAR AIR (serves the scalar bit-by-bit to ECDAS). +pub fn create_ec_scalar_air(proof_options: &ProofOptions) -> VmAir { + let (transition_constraints, _) = crate::tables::ec_scalar::create_constraints(0); + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: ec_scalar_bus_interactions(), + }; + AirWithBuses::new( + ec_scalar_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("EC_SCALAR") +} + +/// Create ECDAS AIR (per-step double/add of the scalar-multiplication sequence). +pub fn create_ecdas_air(proof_options: &ProofOptions) -> VmAir { + let (transition_constraints, _) = crate::tables::ecdas::create_constraints(0); + let auxiliary_trace_build_data = AuxiliaryTraceBuildData { + interactions: ecdas_bus_interactions(), + }; + AirWithBuses::new( + ecdas_cols::NUM_COLUMNS, + auxiliary_trace_build_data, + proof_options, + 1, + transition_constraints, + ) + .with_name("ECDAS") +} diff --git a/prover/src/tests/ec_scalar_tests.rs b/prover/src/tests/ec_scalar_tests.rs new file mode 100644 index 000000000..462443843 --- /dev/null +++ b/prover/src/tests/ec_scalar_tests.rs @@ -0,0 +1,91 @@ +//! Tests for the EC_SCALAR table — constraint satisfaction on generated traces, +//! the `last_limb` schedule, and the constraint count. + +use crate::constraints::templates::IsBitConstraint; +use crate::tables::ec_scalar::{ + MulZeroConstraint, cols, create_constraints, generate_ec_scalar_trace, rows_for_scalar, +}; +use crate::tables::types::{FE, GoldilocksExtension, GoldilocksField}; +use stark::constraints::transition::TransitionConstraint; +use stark::table::TableView; +use stark::trace::TraceTable; + +/// Builds a one-row `TableView` for `row` of the trace (constraints only read row 0). +fn row_view( + trace: &TraceTable, + row: usize, +) -> TableView { + let main: Vec = (0..cols::NUM_COLUMNS) + .map(|c| *trace.main_table.get(row, c)) + .collect(); + TableView::new(vec![main], vec![]) +} + +#[test] +fn constraints_hold_on_generated_trace() { + let mut k = [0u8; 32]; + // a scalar with assorted bit patterns across several bytes + k[0] = 0b1010_0101; + k[1] = 0xFF; + k[15] = 0x80; + k[31] = 0x01; + let ops = rows_for_scalar(444, 0x3000, &k); + let trace = generate_ec_scalar_trace(&ops); + + // IS_BIT columns + let mut bit_cols = vec![cols::MU]; + bit_cols.extend((0..8).map(cols::limb_bit)); + bit_cols.push(cols::LAST_LIMB); + + for row in 0..trace.num_rows() { + let view = row_view(&trace, row); + for &col in &bit_cols { + let v = IsBitConstraint::unconditional(col, 0).evaluate(&view); + assert_eq!(v, FE::zero(), "IS_BIT col {col} row {row}"); + } + // implication constraints + for i in 0..8 { + let c = MulZeroConstraint { + a: cols::limb_bit(i), + b: cols::MU, + b_complement: true, + constraint_idx: 0, + }; + assert_eq!(c.evaluate(&view), FE::zero(), "limb_bit{i}=>mu row {row}"); + } + let c = MulZeroConstraint { + a: cols::LAST_LIMB, + b: cols::MU, + b_complement: true, + constraint_idx: 0, + }; + assert_eq!(c.evaluate(&view), FE::zero(), "last_limb=>mu row {row}"); + let c = MulZeroConstraint { + a: cols::LAST_LIMB, + b: cols::OFFSET, + b_complement: false, + constraint_idx: 0, + }; + assert_eq!(c.evaluate(&view), FE::zero(), "last_limb=>offset row {row}"); + } +} + +#[test] +fn last_limb_set_only_at_offset_zero() { + let k = [7u8; 32]; + let ops = rows_for_scalar(4, 0x100, &k); + assert_eq!(ops.len(), 32); + for op in &ops { + assert_eq!(op.last_limb, op.offset == 0); + } + // 32 distinct offsets 31..0 + assert_eq!(ops[0].offset, 31); + assert_eq!(ops[31].offset, 0); +} + +#[test] +fn create_constraints_count() { + let (constraints, next) = create_constraints(0); + assert_eq!(constraints.len(), 20); + assert_eq!(next, 20); +} diff --git a/prover/src/tests/ecdas_tests.rs b/prover/src/tests/ecdas_tests.rs new file mode 100644 index 000000000..4e6a95bee --- /dev/null +++ b/prover/src/tests/ecdas_tests.rs @@ -0,0 +1,133 @@ +//! Tests for the ECDAS double/add table — the `R_BYTES` offset constant, constraint +//! satisfaction on generated traces across many scalars, and the constraint count. + +use crate::constraints::templates::IsBitConstraint; +use crate::tables::ecdas::{ + ColIsZero, ConvCarry, EcdasOperation, MulZero, R_BYTES, Relation, cols, create_constraints, + generate_ecdas_trace, +}; +use crate::tables::types::{FE, GoldilocksExtension, GoldilocksField}; +use ecsm::compute_witness; +use stark::constraints::transition::TransitionConstraint; +use stark::table::TableView; +use stark::trace::TraceTable; + +fn gx_le() -> [u8; 32] { + let mut be = [ + 0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + be.reverse(); + be +} + +fn k_le(v: u64) -> [u8; 32] { + let mut k = [0u8; 32]; + k[..8].copy_from_slice(&v.to_le_bytes()); + k +} + +fn ops_for(k: u64) -> Vec { + let w = compute_witness(&k_le(k), &gx_le()).unwrap(); + w.steps + .into_iter() + .map(|step| EcdasOperation { + timestamp: 444, + step, + }) + .collect() +} + +fn row_view( + trace: &TraceTable, + row: usize, +) -> TableView { + let main: Vec = (0..cols::NUM_COLUMNS) + .map(|c| *trace.main_table.get(row, c)) + .collect(); + TableView::new(vec![main], vec![]) +} + +#[test] +fn r_bytes_is_three_p() { + // 3·p as 33 little-endian bytes, cross-checked against the ecsm field modulus. + let p = ecsm::p(); + let three_p = &p * 3u32; + let mut bytes = three_p.to_bytes_le(); + bytes.resize(33, 0); + assert_eq!(&bytes[..], &R_BYTES[..]); +} + +/// Every ECDAS constraint evaluates to zero on a generated trace across many scalars +/// (which exercise both double and add steps), including padding rows. +#[test] +fn constraints_hold_on_generated_trace() { + for k in [2u64, 3, 5, 7, 0xFF, 0xABCD, 1_000_003] { + let ops = ops_for(k); + assert!(!ops.is_empty(), "k={k} should have steps"); + let trace = generate_ecdas_trace(&ops); + + for row in 0..trace.num_rows() { + let view = row_view(&trace, row); + assert_eq!( + IsBitConstraint::unconditional(cols::MU, 0).evaluate(&view), + FE::zero(), + "is_bit(mu) k={k} row {row}" + ); + assert_eq!( + IsBitConstraint::unconditional(cols::NEXT_OP, 0).evaluate(&view), + FE::zero() + ); + assert_eq!( + MulZero { + a: cols::OP, + b: cols::NEXT_OP, + b_complement: false, + constraint_idx: 0 + } + .evaluate(&view), + FE::zero(), + "op·next_op k={k} row {row}" + ); + assert_eq!( + MulZero { + a: cols::NEXT_OP, + b: cols::MU, + b_complement: true, + constraint_idx: 0 + } + .evaluate(&view), + FE::zero() + ); + for relation in [Relation::Lambda, Relation::Xr, Relation::Yr] { + for i in 0..64 { + let v = ConvCarry { + relation, + i, + constraint_idx: 0, + } + .evaluate(&view); + assert_eq!(v, FE::zero(), "conv k={k} i={i} row {row}"); + } + } + for c_base in [cols::C0, cols::C1, cols::C2] { + assert_eq!( + ColIsZero { + col: c_base + 63, + constraint_idx: 0 + } + .evaluate(&view), + FE::zero() + ); + } + } + } +} + +#[test] +fn create_constraints_count() { + let (constraints, next) = create_constraints(0); + assert_eq!(constraints.len(), 199); + assert_eq!(next, 199); +} diff --git a/prover/src/tests/ecsm_tests.rs b/prover/src/tests/ecsm_tests.rs new file mode 100644 index 000000000..bc92c4596 --- /dev/null +++ b/prover/src/tests/ecsm_tests.rs @@ -0,0 +1,194 @@ +//! Tests for the ECSM core table — constraint satisfaction on generated traces, +//! constraint count, and the yG padding-closure argument. + +use crate::constraints::templates::IsBitConstraint; +use crate::tables::ecsm::{ + CarryBit, ColIsZero, ConvCarry, EcsmOperation, OverflowKind, OverflowRequired, Relation, cols, + create_constraints, generate_ecsm_trace, +}; +use crate::tables::types::{FE, GoldilocksExtension, GoldilocksField}; +use ecsm::{P_BYTES, compute_witness}; +use stark::constraints::transition::TransitionConstraint; +use stark::table::TableView; +use stark::trace::TraceTable; + +fn gx_le() -> [u8; 32] { + // secp256k1 Gx, little-endian. + let mut be = [ + 0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + be.reverse(); + be +} + +fn k_le(v: u64) -> [u8; 32] { + let mut k = [0u8; 32]; + k[..8].copy_from_slice(&v.to_le_bytes()); + k +} + +fn op_for(k: u64) -> EcsmOperation { + let witness = compute_witness(&k_le(k), &gx_le()).unwrap(); + EcsmOperation { + timestamp: 444, + addr_xg: 0x2000, + addr_k: 0x3000, + addr_xr: 0x1000, + witness, + } +} + +fn row_view( + trace: &TraceTable, + row: usize, +) -> TableView { + let main: Vec = (0..cols::NUM_COLUMNS) + .map(|c| *trace.main_table.get(row, c)) + .collect(); + TableView::new(vec![main], vec![]) +} + +/// Every ECSM constraint evaluates to zero on a generated trace (real + padding rows). +#[test] +fn constraints_hold_on_generated_trace() { + let ops: Vec = [1u64, 2, 5, 0xFFFF, 1_000_003] + .iter() + .map(|&k| op_for(k)) + .collect(); + let trace = generate_ecsm_trace(&ops); + + for row in 0..trace.num_rows() { + let view = row_view(&trace, row); + // Re-evaluate concrete constraints (mirror create_constraints) at this row. + assert_eq!( + IsBitConstraint::unconditional(cols::MU, 0).evaluate(&view), + FE::zero(), + "is_bit(mu) row {row}" + ); + for i in 0..64 { + for relation in [Relation::X2, Relation::Yg] { + let v = ConvCarry { + relation, + i, + constraint_idx: 0, + } + .evaluate(&view); + assert_eq!(v, FE::zero(), "conv carry i={i} row {row}"); + } + } + assert_eq!( + ColIsZero { + col: cols::c0(63), + constraint_idx: 0 + } + .evaluate(&view), + FE::zero() + ); + assert_eq!( + ColIsZero { + col: cols::c1(63), + constraint_idx: 0 + } + .evaluate(&view), + FE::zero() + ); + for kind in [OverflowKind::KLtN, OverflowKind::XrLtP] { + for i in 0..7 { + assert_eq!( + CarryBit { + kind, + i, + constraint_idx: 0 + } + .evaluate(&view), + FE::zero(), + "carry bit kind i={i} row {row}" + ); + } + assert_eq!( + OverflowRequired { + kind, + constraint_idx: 0 + } + .evaluate(&view), + FE::zero(), + "overflow required row {row}" + ); + } + } +} + +#[test] +fn create_constraints_count() { + let (constraints, next) = create_constraints(0); + assert_eq!(constraints.len(), 148); + assert_eq!(next, 148); +} + +/// The yG carry recurrence is unsatisfiable on a padding row unless two ingredients hold, +/// and this test locks both: +/// (a) `q1` pads to `p`, so the `p² − q1·p` offset cancels; +/// (b) the curve constant `b` is multiplied by `µ`, so it drops when `µ = 0`. +/// Removing either ingredient leaves a nonzero residual on the yG limb-0 relation. +/// The x² relation has no standalone constant, so it closes on all-zero padding and is +/// left fully unconditional. +#[test] +fn yg_padding_closes_via_q1_eq_p_and_mu_gated_b() { + // yG limb-0 ConvCarry residual on a one-off row with the given `µ` and `q1`. + let yg_residual = |mu: u64, q1_is_p: bool| { + let mut main = vec![FE::zero(); cols::NUM_COLUMNS]; + main[cols::MU] = FE::from(mu); + if q1_is_p { + for (i, &b) in P_BYTES.iter().enumerate() { + main[cols::Q1 + i] = FE::from(b as u64); + } + } + let view: TableView = + TableView::new(vec![main], vec![]); + ConvCarry { + relation: Relation::Yg, + i: 0, + constraint_idx: 0, + } + .evaluate(&view) + }; + + // The padding row this chip emits (µ = 0, q1 = p): both ingredients present → closes. + assert_eq!( + yg_residual(0, true), + FE::zero(), + "padding row (µ=0, q1=p) must close" + ); + + // Drop ingredient (a): q1 = 0 instead of p → the p² offset is uncancelled. + assert_eq!( + yg_residual(0, false), + FE::zero() - FE::from(2209u64), + "without q1=p the residual is −P_0² = −47²" + ); + + // Drop ingredient (b): force the row active (µ = 1) so the curve constant `b` + // survives even with q1 = p. Residual = b = 7. + assert_eq!( + yg_residual(1, true), + FE::from(7u64), + "with µ=1 (b ungated) the leftover residual is the curve constant b=7" + ); + + // x² has no standalone constant → closes on an all-zero padding row regardless. + let mut zero = vec![FE::zero(); cols::NUM_COLUMNS]; + zero[cols::MU] = FE::zero(); + let zview: TableView = TableView::new(vec![zero], vec![]); + assert_eq!( + ConvCarry { + relation: Relation::X2, + i: 0, + constraint_idx: 0, + } + .evaluate(&zview), + FE::zero(), + "x² closes on all-zero padding (no standalone constant)" + ); +} diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs index 54705f401..af1ee316f 100644 --- a/prover/src/tests/mod.rs +++ b/prover/src/tests/mod.rs @@ -31,6 +31,12 @@ pub mod disk_spill_tests; #[cfg(test)] pub mod dvrm_tests; #[cfg(test)] +pub mod ec_scalar_tests; +#[cfg(test)] +pub mod ecdas_tests; +#[cfg(test)] +pub mod ecsm_tests; +#[cfg(test)] pub mod eq_tests; #[cfg(test)] pub mod keccak_rnd_tests; diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs index 4924a0943..dd8780d74 100644 --- a/prover/src/tests/prove_elfs_tests.rs +++ b/prover/src/tests/prove_elfs_tests.rs @@ -1075,6 +1075,177 @@ fn test_prove_elfs_keccak_multi_call() { ); } +#[test] +fn test_prove_elfs_ecsm() { + let _ = env_logger::builder().is_test(true).try_init(); + + let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let executor = + executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + + // The guest computes 5·G and commits the 32-byte x-coordinate; cross-check it against + // the reference scalar multiplication. Gx, little-endian: + let mut gx = [ + 0x79u8, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + gx.reverse(); + let mut k = [0u8; 32]; + k[0] = 5; + let expected_xr = ecsm::scalar_mul_x(&k, &gx).unwrap(); + assert_eq!( + result.return_values.memory_values, + expected_xr.to_vec(), + "committed xR must equal x(5G)" + ); + + let mut traces = + Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap(); + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "ECSM prove/verify failed" + ); +} + +#[test] +fn test_prove_elfs_ecsm_multi() { + let _ = env_logger::builder().is_test(true).try_init(); + + let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm_multi"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let executor = + executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + + // Gx little-endian. + let mut gx = [ + 0x79u8, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + gx.reverse(); + + // The guest commits x(1·G) || x(5·G) || x(0xABCDEF·G); cross-check each 32-byte chunk. + // k=1 exercises the zero-ECDAS-steps edge; 0xABCDEF exercises many doubles + adds. + let mut expected = Vec::new(); + for kv in [1u64, 5, 0xABCDEF] { + let mut k = [0u8; 32]; + k[..8].copy_from_slice(&kv.to_le_bytes()); + expected.extend_from_slice(&ecsm::scalar_mul_x(&k, &gx).unwrap()); + } + assert_eq!( + result.return_values.memory_values, expected, + "committed outputs must equal x(1G) || x(5G) || x(0xABCDEF·G)" + ); + + let mut traces = + Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap(); + assert!( + prove_and_verify_vm_minimal(&elf, &mut traces), + "ECSM multi-call prove/verify failed" + ); +} + +/// End-to-end via the **Rust-guest path**: the `syscalls::ecsm_mul` wrapper computes 5·G and +/// commits its x-coordinate. Verifies the wrapper works end-to-end (parity with the asm guest). +#[test] +fn test_prove_ecsm_rust_guest() { + let _ = env_logger::builder().is_test(true).try_init(); + + let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .to_path_buf(); + let elf_bytes = std::fs::read(workspace_root.join("executor/program_artifacts/rust/ecsm.elf")) + .expect("ecsm.elf not found — run `make compile-programs-rust`"); + + let proof = prove_vm_minimal(&elf_bytes, &[], &Default::default()); + assert!( + verify_vm_minimal(&proof, &elf_bytes), + "ecsm rust guest should verify" + ); + + // Committed output must equal x(5·G). + let mut gx = [ + 0x79u8, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B, + 0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8, + 0x17, 0x98, + ]; + gx.reverse(); + let mut k = [0u8; 32]; + k[0] = 5; + assert_eq!( + proof.public_output, + ecsm::scalar_mul_x(&k, &gx).unwrap().to_vec() + ); +} + +/// Soundness: the verifier REJECTS a forged ECSM result. +/// +/// A malicious prover must not be able to claim a wrong `k·G`. We tamper the result +/// x-coordinate `xR` in the ECSM trace (to a different valid byte). `xR` is bound by the +/// final ECDAS-bus tuple (the constrained double-and-add output) and by the `xR < p` +/// carry-chain check, so the forgery unbalances the buses / breaks the constraints and the +/// proof must fail to verify. +#[test] +fn test_prove_elfs_ecsm_forged_result_rejected() { + use crate::tables::ecsm::cols as ecsm_cols; + + let _ = env_logger::builder().is_test(true).try_init(); + + let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let executor = + executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + let mut traces = + Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap(); + + // Forge the low byte of xR on the (single) real ECSM row. + let orig = *traces.ecsm.main_table.get(0, ecsm_cols::xr(0)); + let forged = orig + FieldElement::::one(); + traces.ecsm.main_table.set(0, ecsm_cols::xr(0), forged); + + assert!( + !prove_and_verify_vm_minimal(&elf, &mut traces), + "Verifier must reject a forged ECSM result xR" + ); +} + +/// Verifies SPEC-1 (a spec bug) + deviation D1: `ecdas.toml` omits `IS_BIT(µ)`, but `µ` is the +/// multiplicity of every ECDAS bus interaction. The implementation adds `IS_BIT(µ)`; this test +/// confirms it is load-bearing by forging a non-boolean `µ` on a real ECDAS row and asserting +/// the verifier rejects. (k=5 produces 3 ECDAS rows.) +#[test] +fn test_prove_elfs_ecsm_forged_ecdas_mu_rejected() { + use crate::tables::ecdas::cols as ecdas_cols; + + let _ = env_logger::builder().is_test(true).try_init(); + + let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm"); + let elf = Elf::load(&elf_bytes).expect("Failed to load ELF"); + let executor = + executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor"); + let result = executor.run().expect("Failed to run program"); + let mut traces = + Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap(); + + // Row 0 is a real ECDAS step (µ=1); forge µ to a non-boolean value. + traces.ecdas.main_table.set( + 0, + ecdas_cols::MU, + FieldElement::::from(2u64), + ); + + assert!( + !prove_and_verify_vm_minimal(&elf, &mut traces), + "Verifier must reject a non-boolean ECDAS multiplicity (IS_BIT(µ), spec omits it)" + ); +} + /// Verifier REJECTS a forged trace where an addr byte cell is set to a /// non-byte field element. /// @@ -2260,7 +2431,7 @@ fn test_crafted_zero_count_proof_must_not_verify() { let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts, None, None); let verifier_air_refs = airs.air_refs(); - assert_eq!(verifier_air_refs.len(), 8); + assert_eq!(verifier_air_refs.len(), crate::FIXED_TABLE_COUNT); let mut bitwise_trace = crate::tables::bitwise::generate_bitwise_trace(); diff --git a/syscalls/README.md b/syscalls/README.md index fa5758741..9e972e0d0 100644 --- a/syscalls/README.md +++ b/syscalls/README.md @@ -12,6 +12,7 @@ Published as `lambda-vm-syscalls`. Intended to be used from RISC-V (RV64IM) gues | `get_private_input() -> Vec` | Read the host-supplied private input bytes (memory-mapped at `0xFF000000`). | | `sys_halt() -> !` | Terminate execution cleanly. Called automatically after `main` by the default entry point. | | `keccak_permute(state: &mut [u64; 25])` | Keccak-f[1600] permutation precompile. | +| `ecsm_mul(xr: &mut [u8; 32], xg: &[u8; 32], k: &[u8; 32])` | secp256k1 scalar multiplication: writes `xR = (k·G)_x` (32-byte little-endian; `0 < k < N`). | The crate also provides a default `_start` that initialises the allocator, calls `main`, and halts. diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs index e4f1d9d65..52246e465 100644 --- a/syscalls/src/syscalls.rs +++ b/syscalls/src/syscalls.rs @@ -20,6 +20,10 @@ pub enum SyscallNumbers { #[cfg(target_arch = "riscv64")] const KECCAK_SYSCALL_NUMBER: usize = usize::MAX - 1; +/// Syscall number for the ECSM secp256k1 scalar-multiply accelerator (u64::MAX - 2, = -3). +#[cfg(target_arch = "riscv64")] +const ECSM_SYSCALL_NUMBER: usize = usize::MAX - 2; + /// No-op. The `Print` ecall (a7=1) has no receiver on the Ecall bus, so emitting /// it makes the LogUp bus unbalance and the proof fail to verify. Printing isn't /// needed in provable programs, so `print_string` does nothing on every target. @@ -130,6 +134,27 @@ pub fn keccak_permute(_state: &mut [u64; 25]) { unimplemented!("syscalls are only implemented for riscv64 targets"); } +#[cfg(target_arch = "riscv64")] +/// Compute `xR = (k·G)_x` on secp256k1 via the ECSM accelerator. All values are 32-byte +/// little-endian. Requires `0 < k < N` and `xG` a valid curve x-coordinate; `xR` may alias `xG`. +pub fn ecsm_mul(xr: &mut [u8; 32], xg: &[u8; 32], k: &[u8; 32]) { + unsafe { + asm!( + "ecall", + in("a0") xr.as_mut_ptr(), // x10 = address to write xR + in("a1") xg.as_ptr(), // x11 = address of xG + in("a2") k.as_ptr(), // x12 = address of k + in("a7") ECSM_SYSCALL_NUMBER, + ) + } +} + +#[cfg(not(target_arch = "riscv64"))] +/// Compute `xR = (k·G)_x` on secp256k1 via the ECSM accelerator (32-byte little-endian values). +pub fn ecsm_mul(_xr: &mut [u8; 32], _xg: &[u8; 32], _k: &[u8; 32]) { + unimplemented!("syscalls are only implemented for riscv64 targets"); +} + // ============================================================================= // Stub implementations for unsupported std functions // These functions are required by Rust's std zkvm module but are not supported