diff --git a/.github/workflows/pr_main.yaml b/.github/workflows/pr_main.yaml
index 68fae4fb0..81c12d15c 100644
--- a/.github/workflows/pr_main.yaml
+++ b/.github/workflows/pr_main.yaml
@@ -213,7 +213,7 @@ jobs:
       - name: Build and archive prover + crypto tests
         run: |
           cargo nextest archive --release \
-            -p lambda-vm-prover -p stark -p crypto \
+            -p lambda-vm-prover -p stark -p crypto -p ecsm \
             --archive-file prover-tests.tar.zst
 
       - name: Upload test archive
diff --git a/Cargo.lock b/Cargo.lock
index 56f65fcf5..33fd1fb71 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -977,6 +977,15 @@ dependencies = [
  "spki",
 ]
 
+[[package]]
+name = "ecsm"
+version = "0.1.0"
+dependencies = [
+ "k256",
+ "num-bigint 0.4.6",
+ "num-traits",
+]
+
 [[package]]
 name = "educe"
 version = "0.6.0"
@@ -1327,6 +1336,7 @@ dependencies = [
 name = "executor"
 version = "0.1.0"
 dependencies = [
+ "ecsm",
  "guest_program",
  "rkyv",
  "rustc-demangle",
@@ -1982,6 +1992,7 @@ dependencies = [
  "bincode",
  "criterion 0.5.1",
  "crypto",
+ "ecsm",
  "env_logger",
  "executor",
  "log",
diff --git a/Cargo.toml b/Cargo.toml
index 2ba670c40..d64852eb7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ members = [
   "crypto/crypto",
   "crypto/math",
   "crypto/math-cuda",
+  "crypto/ecsm",
   "bin/cli",
 ]
 
diff --git a/crypto/ecsm/Cargo.toml b/crypto/ecsm/Cargo.toml
new file mode 100644
index 000000000..52eb0962e
--- /dev/null
+++ b/crypto/ecsm/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "ecsm"
+description = "secp256k1 scalar multiplication reference + ECSM accelerator witness generation"
+version = "0.1.0"
+edition = "2024"
+license.workspace = true
+
+[dependencies]
+num-bigint = "0.4.6"
+num-traits = "0.2.19"
+# Audited secp256k1 arithmetic (host-side witness generation only; never in the
+# constraint system). Used for the projective double-and-add replay + batch
+# inversion that builds the ECDAS step witnesses efficiently.
+k256 = { version = "0.13", default-features = false, features = ["arithmetic", "expose-field"] }
diff --git a/crypto/ecsm/src/curve.rs b/crypto/ecsm/src/curve.rs
new file mode 100644
index 000000000..bad7238f1
--- /dev/null
+++ b/crypto/ecsm/src/curve.rs
@@ -0,0 +1,418 @@
+//! secp256k1 curve arithmetic in affine coordinates and the chip-faithful
+//! double-and-add replay.
+//!
+//! The curve is `y^2 = x^3 + 7 mod p` (short Weierstrass with `a = 0`). The point at
+//! infinity never appears: the ECSM/ECDAS design guarantees it cannot occur for
+//! `k in [1, N)` (see `ecsm.typ` "Point at infinity" / ECDAS soundness argument), so the
+//! affine formulas below are always well defined.
+
+use num_bigint::BigUint;
+
+#[cfg(test)]
+use crate::field::Fp;
+
+/// An affine curve point. Never the point at infinity.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct AffinePoint {
+    pub x: BigUint,
+    pub y: BigUint,
+}
+
+/// Recovers the canonical (even) `y` for a given `x` such that `y^2 = x^3 + b mod p`.
+///
+/// Both `y` and `p - y` are valid; we pick the even one so the executor and prover agree
+/// deterministically. The chip never constrains the parity (it only writes back `xR`, and
+/// `k·P` and `k·(-P)` share an x-coordinate), so any consistent choice is sound.
+///
+/// Returns `None` when `x` is not a valid curve x-coordinate (`x^3 + b` is not a quadratic
+/// residue, or `x` is not a canonical field element).
+pub fn recover_y_canonical(x: &BigUint) -> Option<BigUint> {
+    // SEC1 compressed encoding: the `0x02` prefix selects the even-`y` root, delegated to k256.
+    let mut enc = [0u8; 33];
+    enc[0] = 0x02;
+    enc[1..33].copy_from_slice(&be32(x));
+    let ep = EncodedPoint::from_bytes(enc).ok()?;
+    let affine: K256Affine = Option::from(K256Affine::from_encoded_point(&ep))?;
+    Some(from_k256_affine(&affine).y)
+}
+
+/// `2·a` on the curve. Requires `a.y != 0` (always true on secp256k1).
+#[cfg(test)]
+pub fn point_double(a: &AffinePoint) -> AffinePoint {
+    let x = Fp::new(a.x.clone());
+    let y = Fp::new(a.y.clone());
+    // λ = 3x² / 2y
+    let three_x2 = x.mul(&x).mul(&Fp::from_u64(3));
+    let two_y = y.add(&y);
+    let lambda = three_x2.mul(&two_y.inv());
+    // xr = λ² - 2x
+    let xr = lambda.mul(&lambda).sub(&x).sub(&x);
+    // yr = λ(x - xr) - y
+    let yr = lambda.mul(&x.sub(&xr)).sub(&y);
+    AffinePoint { x: xr.0, y: yr.0 }
+}
+
+/// `a + g` on the curve. Requires `a.x != g.x` (always true in the chip's add steps).
+#[cfg(test)]
+pub fn point_add(a: &AffinePoint, g: &AffinePoint) -> AffinePoint {
+    let xa = Fp::new(a.x.clone());
+    let ya = Fp::new(a.y.clone());
+    let xg = Fp::new(g.x.clone());
+    let yg = Fp::new(g.y.clone());
+    // λ = (yg - ya) / (xg - xa)
+    let lambda = yg.sub(&ya).mul(&xg.sub(&xa).inv());
+    // xr = λ² - xa - xg
+    let xr = lambda.mul(&lambda).sub(&xa).sub(&xg);
+    // yr = λ(xa - xr) - ya
+    let yr = lambda.mul(&xa.sub(&xr)).sub(&ya);
+    AffinePoint { x: xr.0, y: yr.0 }
+}
+
+/// One step of the double-and-add replay, at point level.
+///
+/// Mirrors a single ECDAS row: receive accumulator `a` (and base `g`), perform `op`
+/// (0 = double, 1 = add), and decide `next_op` (whether the next row is an add).
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct StepPts {
+    pub a: AffinePoint,
+    pub g: AffinePoint,
+    pub round: u8,
+    pub op: u8,
+    pub next_op: u8,
+    pub r: AffinePoint,
+    /// Slope of this step: add => (yG-yA)/(xG-xA), double => 3xA^2/(2yA).
+    /// Precomputed here (batched) so the witness builder never inverts per step.
+    pub lambda: BigUint,
+}
+
+/// Reference slope `lambda` for one step, computed in `BigUint` `F_p`.
+/// Used by the reference replay and the k256 parity test.
+#[cfg(test)]
+pub fn step_lambda(a: &AffinePoint, g: &AffinePoint, op: u8) -> BigUint {
+    let xa = Fp::new(a.x.clone());
+    let ya = Fp::new(a.y.clone());
+    if op == 1 {
+        let xg = Fp::new(g.x.clone());
+        let yg = Fp::new(g.y.clone());
+        yg.sub(&ya).mul(&xg.sub(&xa).inv()).0
+    } else {
+        let three_x2 = xa.mul(&xa).mul(&Fp::from_u64(3));
+        let two_y = ya.add(&ya);
+        three_x2.mul(&two_y.inv()).0
+    }
+}
+
+/// Bit length minus one = position of the most significant set bit (`len_k`).
+/// Requires `k >= 1`.
+pub fn msb_position(k: &BigUint) -> u32 {
+    debug_assert!(k > &BigUint::from(0u8));
+    (k.bits() as u32) - 1
+}
+
+/// Replays the ECDAS double-and-add sequence for `k·g`, returning every step and the
+/// final point. This is the single source of truth for both the executor (which needs
+/// only `final.x`) and the prover (which needs the full step list to build witnesses).
+///
+/// The schedule matches the spec exactly: start with `A = g`, `round = len_k - 1`,
+/// `op = double`; a double at `round` sets `next_op` to the scalar bit at `round`
+/// (1 ⇒ the next row adds at the same round); an add forces `next_op = 0` and advances
+/// the round. The MSB itself is represented by the initial `A = g` (consumed by ECSM via
+/// the `BIT[len_k]` interaction), so it is never processed as an add here.
+#[cfg(test)]
+pub fn replay_double_and_add_reference(
+    k: &BigUint,
+    g: &AffinePoint,
+) -> (Vec<StepPts>, AffinePoint) {
+    let m = msb_position(k) as i64; // len_k
+    let mut a = g.clone();
+    let mut round: i64 = m - 1;
+    let mut op: u8 = 0; // double
+    let mut steps = Vec::new();
+
+    while round >= 0 {
+        let (r, next_op) = if op == 0 {
+            let r = point_double(&a);
+            let bit = if k.bit(round as u64) { 1u8 } else { 0u8 };
+            (r, bit)
+        } else {
+            let r = point_add(&a, g);
+            (r, 0u8)
+        };
+        steps.push(StepPts {
+            lambda: step_lambda(&a, g, op),
+            a: a.clone(),
+            g: g.clone(),
+            round: round as u8,
+            op,
+            next_op,
+            r: r.clone(),
+        });
+        let round_sent = round - (1 - next_op as i64);
+        a = r;
+        if round_sent < 0 {
+            break;
+        }
+        round = round_sent;
+        op = next_op;
+    }
+
+    (steps, a)
+}
+
+// =========================================================================
+// k256-backed fast path: projective double-and-add replay + batch inversion.
+//
+// The witness generator is untrusted (the ECDAS chip re-proves every step), so
+// any audited arithmetic is sound here. We replay the schedule in k256
+// projective coordinates (no per-op inversion), `batch_normalize` all points to
+// affine in one shot, and batch-invert the slope denominators — replacing the
+// ~2*len_k Fermat inversions of the reference with two batched inversions.
+// =========================================================================
+
+use k256::elliptic_curve::ff::PrimeField as _;
+use k256::elliptic_curve::group::Curve as _;
+use k256::elliptic_curve::sec1::{FromEncodedPoint, ToEncodedPoint};
+use k256::{AffinePoint as K256Affine, EncodedPoint, FieldElement, ProjectivePoint, Scalar};
+
+/// 32 big-endian bytes of a value known to fit in 256 bits (left zero-padded).
+fn be32(v: &BigUint) -> [u8; 32] {
+    let b = v.to_bytes_be();
+    debug_assert!(b.len() <= 32, "value exceeds 256 bits");
+    let mut out = [0u8; 32];
+    out[32 - b.len()..].copy_from_slice(&b);
+    out
+}
+
+fn fe_from_biguint(v: &BigUint) -> FieldElement {
+    Option::from(FieldElement::from_bytes(&be32(v).into()))
+        .expect("ECSM: field element must be < p")
+}
+
+fn biguint_from_fe(f: &FieldElement) -> BigUint {
+    BigUint::from_bytes_be(&f.to_bytes())
+}
+
+fn to_k256_affine(a: &AffinePoint) -> K256Affine {
+    let ep = EncodedPoint::from_affine_coordinates(&be32(&a.x).into(), &be32(&a.y).into(), false);
+    Option::from(K256Affine::from_encoded_point(&ep)).expect("ECSM: point must be on the curve")
+}
+
+fn from_k256_affine(p: &K256Affine) -> AffinePoint {
+    let ep = p.to_encoded_point(false);
+    AffinePoint {
+        x: BigUint::from_bytes_be(ep.x().expect("ECSM: affine point has x")),
+        y: BigUint::from_bytes_be(ep.y().expect("ECSM: affine point has y")),
+    }
+}
+
+/// Montgomery's batch inversion over `FieldElement`: one real inversion total.
+fn batch_invert(xs: &[FieldElement]) -> Vec<FieldElement> {
+    let n = xs.len();
+    let mut prefix = Vec::with_capacity(n);
+    let mut acc = FieldElement::ONE;
+    for x in xs {
+        prefix.push(acc);
+        acc *= *x;
+    }
+    let mut inv =
+        Option::<FieldElement>::from(acc.invert()).expect("ECSM: batch denominator is nonzero");
+    let mut out = vec![FieldElement::ONE; n];
+    for i in (0..n).rev() {
+        out[i] = prefix[i] * inv;
+        inv *= xs[i];
+    }
+    out
+}
+
+/// The double-and-add schedule for `k`: one `(round, op, next_op)` per ECDAS row.
+/// Pure bit logic (data-independent of point values), identical control flow to
+/// the reference replay.
+fn schedule(k: &BigUint) -> Vec<(u8, u8, u8)> {
+    let m = msb_position(k) as i64;
+    let mut sched = Vec::new();
+    let mut round: i64 = m - 1;
+    let mut op: u8 = 0;
+    while round >= 0 {
+        let next_op = if op == 0 {
+            if k.bit(round as u64) { 1u8 } else { 0u8 }
+        } else {
+            0u8
+        };
+        sched.push((round as u8, op, next_op));
+        let round_sent = round - (1 - next_op as i64);
+        if round_sent < 0 {
+            break;
+        }
+        round = round_sent;
+        op = next_op;
+    }
+    sched
+}
+
+/// Executor fast path: the x-coordinate of `k·g`, via k256's optimized scalar
+/// multiplication. Needs no step list or slopes, so it skips all witness work.
+/// `k` must be in `[1, N)` (guaranteed by `prepare`).
+pub fn scalar_mul_affine_x(k: &BigUint, g: &AffinePoint) -> BigUint {
+    let scalar = Option::<Scalar>::from(Scalar::from_repr(be32(k).into()))
+        .expect("ECSM: scalar k must be < N");
+    let g_proj = ProjectivePoint::from(to_k256_affine(g));
+    let r = (g_proj * scalar).to_affine();
+    from_k256_affine(&r).x
+}
+
+/// Replays the ECDAS double-and-add for `k·g` using k256 projective arithmetic and
+/// batched inversion. Produces the identical `StepPts` sequence as
+/// [`replay_double_and_add_reference`] (validated by the parity test), but with two
+/// batched inversions instead of one per double/add step.
+pub fn replay_double_and_add(k: &BigUint, g: &AffinePoint) -> (Vec<StepPts>, AffinePoint) {
+    let sched = schedule(k);
+    if sched.is_empty() {
+        return (Vec::new(), g.clone()); // k == 1: result is g, no steps
+    }
+    let n = sched.len();
+
+    // 1. projective replay (no inversions): record a and r at every step.
+    let g_proj = ProjectivePoint::from(to_k256_affine(g));
+    let mut a_proj = g_proj;
+    let mut points = Vec::with_capacity(2 * n); // [a_0..a_{n-1}, r_0..r_{n-1}]
+    let mut r_projs = Vec::with_capacity(n);
+    for &(_, op, _) in &sched {
+        let r_proj = if op == 0 {
+            a_proj.double()
+        } else {
+            a_proj + g_proj
+        };
+        points.push(a_proj);
+        r_projs.push(r_proj);
+        a_proj = r_proj;
+    }
+    points.extend_from_slice(&r_projs);
+
+    // 2. one batch_normalize for every a and r.
+    let mut affine = vec![K256Affine::IDENTITY; points.len()];
+    ProjectivePoint::batch_normalize(&points, &mut affine);
+    let a_aff: Vec<AffinePoint> = affine[..n].iter().map(from_k256_affine).collect();
+    let r_aff: Vec<AffinePoint> = affine[n..].iter().map(from_k256_affine).collect();
+
+    // 3. batch-invert all slope denominators (add: xG-xA, double: 2yA).
+    let gx_fe = fe_from_biguint(&g.x);
+    let gy_fe = fe_from_biguint(&g.y);
+    let denoms: Vec<FieldElement> = (0..n)
+        .map(|i| {
+            if sched[i].1 == 1 {
+                gx_fe - fe_from_biguint(&a_aff[i].x)
+            } else {
+                let ya = fe_from_biguint(&a_aff[i].y);
+                ya + ya
+            }
+        })
+        .collect();
+    let inv_denoms = batch_invert(&denoms);
+
+    // 4. slopes and StepPts.
+    let steps: Vec<StepPts> = (0..n)
+        .map(|i| {
+            let num = if sched[i].1 == 1 {
+                gy_fe - fe_from_biguint(&a_aff[i].y)
+            } else {
+                let x2 = {
+                    let xa = fe_from_biguint(&a_aff[i].x);
+                    xa * xa
+                };
+                x2 + x2 + x2 // 3 xA^2
+            };
+            StepPts {
+                a: a_aff[i].clone(),
+                g: g.clone(),
+                round: sched[i].0,
+                op: sched[i].1,
+                next_op: sched[i].2,
+                r: r_aff[i].clone(),
+                lambda: biguint_from_fe(&(num * inv_denoms[i])),
+            }
+        })
+        .collect();
+
+    let result = r_aff[n - 1].clone();
+    (steps, result)
+}
+
+#[cfg(test)]
+mod parity_tests {
+    use super::*;
+    use crate::n;
+    use num_bigint::BigUint;
+
+    /// secp256k1 generator (even y), via the canonical y recovery.
+    fn generator() -> AffinePoint {
+        let gx = BigUint::parse_bytes(
+            b"79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798",
+            16,
+        )
+        .unwrap();
+        let gy = recover_y_canonical(&gx).expect("G on curve");
+        AffinePoint { x: gx, y: gy }
+    }
+
+    fn be(hex: &[u8]) -> BigUint {
+        BigUint::parse_bytes(hex, 16).unwrap()
+    }
+
+    /// The k256 fast path must produce byte-identical `StepPts` (points + λ) and the
+    /// same final point as the BigUint reference, across small, structured, large and
+    /// near-order scalars. This pins the audited fast path to the spec-faithful reference.
+    #[test]
+    fn k256_replay_matches_reference() {
+        let g = generator();
+        let mut scalars: Vec<BigUint> = (1u64..40).map(BigUint::from).collect();
+        for &kv in &[
+            0xFFu64,
+            0x101,
+            0xABCD,
+            0xFFFF,
+            0x1_0000,
+            1 << 20,
+            123_456_789,
+            u64::MAX,
+        ] {
+            scalars.push(BigUint::from(kv));
+        }
+        // large 256-bit scalars (must stay < N) and the order boundary
+        scalars.push(be(
+            b"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF",
+        ));
+        scalars.push(be(
+            b"7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0",
+        ));
+        scalars.push(&n() / BigUint::from(2u8));
+        scalars.push(&n() - BigUint::from(1u8));
+
+        for k in scalars {
+            let (steps, result) = replay_double_and_add(&k, &g);
+            let (steps_ref, result_ref) = replay_double_and_add_reference(&k, &g);
+            assert_eq!(result, result_ref, "final point mismatch for k = {k}");
+            assert_eq!(steps, steps_ref, "step list mismatch for k = {k}");
+        }
+    }
+
+    /// The executor's fast path (`scalar_mul_affine_x`) and the prover's replay must agree
+    /// on `x(k·G)`: the executor writes it to guest memory and the prover proves it, so any
+    /// divergence would make a correct execution unprovable. They run through two distinct
+    /// k256 entry points (native scalar-mul vs projective double-and-add), so pin them here.
+    #[test]
+    fn executor_and_replay_agree_on_result_x() {
+        let g = generator();
+        let mut scalars: Vec<BigUint> = (1u64..40).map(BigUint::from).collect();
+        for &kv in &[0xFFu64, 0xABCD, 1 << 20, 123_456_789, u64::MAX] {
+            scalars.push(BigUint::from(kv));
+        }
+        scalars.push(&n() / BigUint::from(2u8));
+        scalars.push(&n() - BigUint::from(1u8));
+
+        for k in scalars {
+            let (_steps, result) = replay_double_and_add(&k, &g);
+            let exec_x = scalar_mul_affine_x(&k, &g);
+            assert_eq!(result.x, exec_x, "executor/replay x mismatch for k = {k}");
+        }
+    }
+}
diff --git a/crypto/ecsm/src/field.rs b/crypto/ecsm/src/field.rs
new file mode 100644
index 000000000..5e6c550b8
--- /dev/null
+++ b/crypto/ecsm/src/field.rs
@@ -0,0 +1,46 @@
+//! Arithmetic in the secp256k1 base field `F_p` with `p = 2^256 - 2^32 - 977`.
+//!
+//! Elements are stored as `BigUint` always reduced into `[0, p)`. This is reference
+//! arithmetic used to derive accelerator witnesses — it runs once per `ECALL`, never
+//! in a hot loop, so clarity is preferred over speed.
+
+use num_bigint::BigUint;
+
+use crate::p;
+
+/// An element of the secp256k1 base field, kept reduced into `[0, p)`.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Fp(pub BigUint);
+
+impl Fp {
+    /// Reduces an arbitrary value into the field.
+    pub fn new(v: BigUint) -> Self {
+        Fp(v % p())
+    }
+
+    pub fn from_u64(v: u64) -> Self {
+        Fp(BigUint::from(v) % p())
+    }
+
+    /// `self + other mod p`. Both operands must already be reduced.
+    pub fn add(&self, other: &Fp) -> Fp {
+        Fp((&self.0 + &other.0) % p())
+    }
+
+    /// `self - other mod p`. Both operands must already be reduced.
+    pub fn sub(&self, other: &Fp) -> Fp {
+        let t = &self.0 + p(); // in [p, 2p)
+        Fp((t - &other.0) % p())
+    }
+
+    /// `self * other mod p`. Both operands must already be reduced.
+    pub fn mul(&self, other: &Fp) -> Fp {
+        Fp((&self.0 * &other.0) % p())
+    }
+
+    /// Multiplicative inverse via Fermat's little theorem (`p` is prime): `self^(p-2)`.
+    /// Returns zero for a zero input (which never occurs for valid curve arithmetic).
+    pub fn inv(&self) -> Fp {
+        Fp(self.0.modpow(&(p() - BigUint::from(2u32)), &p()))
+    }
+}
diff --git a/crypto/ecsm/src/lib.rs b/crypto/ecsm/src/lib.rs
new file mode 100644
index 000000000..56a948a12
--- /dev/null
+++ b/crypto/ecsm/src/lib.rs
@@ -0,0 +1,252 @@
+//! Reference secp256k1 scalar multiplication and ECSM-accelerator witness generation.
+//!
+//! This crate is shared by the executor (which needs `k·G`'s x-coordinate to write back
+//! to guest memory) and the prover (which replays the full double-and-add sequence to
+//! fill the ECSM / ECDAS / EC_SCALAR trace witnesses). Both entry points compute the same
+//! `k·G` over the audited `k256` curve arithmetic — the executor via `k256`'s scalar
+//! multiplication, the prover via a projective double-and-add replay — so the x-coordinate
+//! they write/prove agrees. It is also independent of the `yG` root: both recover the same
+//! canonical `yG` in `prepare`, and `k·P` and `k·(-P)` share an x.
+//!
+//! Curve point operations are delegated to the RustCrypto `k256` crate; witness generation
+//! replays the schedule in `k256` projective coordinates and batch-inverts the slope
+//! denominators, while `num-bigint` carries the coordinate/limb representation the trace
+//! needs. All of this runs once per `ECALL`, so it is not performance critical.
+//!
+//! Curve: secp256k1, `y^2 = x^3 + 7 mod p`, `p = 2^256 - 2^32 - 977`, order `N`.
+
+pub mod curve;
+pub mod field;
+pub mod witness;
+
+use num_bigint::BigUint;
+
+pub use curve::{AffinePoint, recover_y_canonical, replay_double_and_add};
+pub use witness::{EcdasStep, EcsmWitness, compute_witness};
+
+/// secp256k1 curve coefficient `b`.
+pub const B: u64 = 7;
+
+/// Prime field modulus `p = 2^256 - 2^32 - 977`, little-endian bytes.
+pub const P_BYTES: [u8; 32] = [
+    0x2F, 0xFC, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+];
+
+/// Curve group order `N`, little-endian bytes.
+pub const N_BYTES: [u8; 32] = [
+    0x41, 0x41, 0x36, 0xD0, 0x8C, 0x5E, 0xD2, 0xBF, 0x3B, 0xA0, 0x48, 0xAF, 0xE6, 0xDC, 0xAE, 0xBA,
+    0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+];
+
+/// The prime field modulus `p` as a `BigUint`.
+pub fn p() -> BigUint {
+    BigUint::from_bytes_le(&P_BYTES)
+}
+
+/// The curve order `N` as a `BigUint`.
+pub fn n() -> BigUint {
+    BigUint::from_bytes_le(&N_BYTES)
+}
+
+/// Errors that prevent a sound ECSM witness from existing for the given inputs.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum EcsmError {
+    /// `k == 0`: `0·G` is the point at infinity, which the accelerator cannot represent.
+    ScalarIsZero,
+    /// `k >= N`: outside the valid scalar range `[1, N)`.
+    ScalarOutOfRange,
+    /// `x^3 + b` is not a quadratic residue, so `xG` is not a valid x-coordinate.
+    NotOnCurve,
+    /// `xG >= p`: not a canonical field element. Reducing it silently would
+    /// diverge from the prover, whose `xR < p` range check makes a non-canonical
+    /// input unprovable (with `k = 1` the input is echoed back as `xR`).
+    CoordinateOutOfRange,
+}
+
+impl core::fmt::Display for EcsmError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self {
+            EcsmError::ScalarIsZero => write!(f, "ECSM scalar k must be non-zero"),
+            EcsmError::ScalarOutOfRange => write!(f, "ECSM scalar k must be < N"),
+            EcsmError::NotOnCurve => write!(f, "ECSM xG is not a valid curve x-coordinate"),
+            EcsmError::CoordinateOutOfRange => write!(f, "ECSM xG must be < p"),
+        }
+    }
+}
+
+impl std::error::Error for EcsmError {}
+
+/// Converts a `BigUint` to 32 little-endian bytes (zero-padded / truncated to 32).
+pub fn to_le_32(v: &BigUint) -> [u8; 32] {
+    debug_assert!(v.bits() <= 256, "to_le_32: value exceeds 256 bits");
+    let mut bytes = v.to_bytes_le();
+    bytes.resize(32, 0);
+    let mut out = [0u8; 32];
+    out.copy_from_slice(&bytes[..32]);
+    out
+}
+
+/// Validates the scalar and recovers the generator point from `(xG, k)`.
+///
+/// Shared front-end for both entry points: checks `0 < k < N`, rebuilds `xG`, and recovers
+/// the canonical `yG`.
+pub(crate) fn prepare(
+    k_le: &[u8; 32],
+    xg_le: &[u8; 32],
+) -> Result<(BigUint, AffinePoint), EcsmError> {
+    let k = BigUint::from_bytes_le(k_le);
+    if k == BigUint::from(0u8) {
+        return Err(EcsmError::ScalarIsZero);
+    }
+    if k >= n() {
+        return Err(EcsmError::ScalarOutOfRange);
+    }
+    let xg = BigUint::from_bytes_le(xg_le);
+    if xg >= p() {
+        return Err(EcsmError::CoordinateOutOfRange);
+    }
+    let yg = recover_y_canonical(&xg).ok_or(EcsmError::NotOnCurve)?;
+    Ok((k, AffinePoint { x: xg, y: yg }))
+}
+
+/// Computes the x-coordinate of `k·G` over secp256k1, given `k` and `xG` as little-endian
+/// 32-byte values. This is the executor's entry point — it writes the returned bytes back
+/// to guest memory at `addr_xR`.
+pub fn scalar_mul_x(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<[u8; 32], EcsmError> {
+    let (k, g) = prepare(k_le, xg_le)?;
+    Ok(to_le_32(&curve::scalar_mul_affine_x(&k, &g)))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Parses a big-endian hex string into a `BigUint`.
+    fn be_hex(s: &str) -> BigUint {
+        BigUint::parse_bytes(s.as_bytes(), 16).unwrap()
+    }
+
+    // secp256k1 generator G.
+    const GX_HEX: &str = "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798";
+    const GY_HEX: &str = "483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8";
+
+    fn gx() -> BigUint {
+        be_hex(GX_HEX)
+    }
+
+    #[test]
+    fn constants_match_known_secp256k1_values() {
+        assert_eq!(
+            p(),
+            be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F")
+        );
+        assert_eq!(
+            n(),
+            be_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141")
+        );
+        // p ≡ 3 mod 4 (a known secp256k1 property).
+        assert_eq!(&p() % 4u32, BigUint::from(3u8));
+    }
+
+    #[test]
+    fn generator_is_on_curve_and_y_is_canonical() {
+        // Gy ends in 0xB8 (even), so the canonical (even) root is Gy itself.
+        let y = recover_y_canonical(&gx()).expect("G is on the curve");
+        assert_eq!(y, be_hex(GY_HEX));
+        assert!(!y.bit(0), "canonical root must be even");
+    }
+
+    #[test]
+    fn recover_y_handles_residues_and_non_residues() {
+        // Roughly half of all x are non-residues; scan a small range and check both
+        // branches deterministically: every recovered y is even and on the curve, and at
+        // least one x has no valid y (the `None` path).
+        let mut saw_none = false;
+        let mut saw_some = false;
+        for x in 1u32..40 {
+            let xb = BigUint::from(x);
+            match recover_y_canonical(&xb) {
+                Some(y) => {
+                    saw_some = true;
+                    assert!(!y.bit(0), "recovered y must be even");
+                    // y^2 == x^3 + b mod p
+                    let lhs = (&y * &y) % p();
+                    let rhs = (&xb * &xb % p() * &xb + BigUint::from(B)) % p();
+                    assert_eq!(lhs, rhs);
+                }
+                None => saw_none = true,
+            }
+        }
+        assert!(
+            saw_some && saw_none,
+            "expected both residues and non-residues in range"
+        );
+    }
+
+    #[test]
+    fn scalar_mul_one_is_identity() {
+        let k = to_le_32(&BigUint::from(1u8));
+        let xg = to_le_32(&gx());
+        assert_eq!(scalar_mul_x(&k, &xg).unwrap(), xg);
+    }
+
+    #[test]
+    fn scalar_mul_two_matches_known_2g() {
+        // x(2G) for secp256k1.
+        let expected = be_hex("C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE5");
+        let k = to_le_32(&BigUint::from(2u8));
+        let xg = to_le_32(&gx());
+        assert_eq!(scalar_mul_x(&k, &xg).unwrap(), to_le_32(&expected));
+    }
+
+    #[test]
+    fn scalar_mul_three_matches_known_3g() {
+        let expected = be_hex("F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9");
+        let k = to_le_32(&BigUint::from(3u8));
+        let xg = to_le_32(&gx());
+        assert_eq!(scalar_mul_x(&k, &xg).unwrap(), to_le_32(&expected));
+    }
+
+    #[test]
+    fn scalar_mul_n_minus_one_shares_x_with_g() {
+        // (N-1)·G = -G, which has the same x-coordinate as G.
+        let k = to_le_32(&(n() - BigUint::from(1u8)));
+        let xg = to_le_32(&gx());
+        assert_eq!(scalar_mul_x(&k, &xg).unwrap(), xg);
+    }
+
+    #[test]
+    fn rejects_zero_and_out_of_range_scalars() {
+        let xg = to_le_32(&gx());
+        assert_eq!(
+            scalar_mul_x(&to_le_32(&BigUint::from(0u8)), &xg),
+            Err(EcsmError::ScalarIsZero)
+        );
+        assert_eq!(
+            scalar_mul_x(&to_le_32(&n()), &xg),
+            Err(EcsmError::ScalarOutOfRange)
+        );
+    }
+
+    #[test]
+    fn rejects_non_canonical_xg() {
+        // xG = p and xG = p + 1 (the alias of x = 1) must be rejected, not
+        // silently reduced: with k = 1 the input bytes would be echoed back as
+        // xR, which the prover's xR < p range check cannot prove.
+        let k = to_le_32(&BigUint::from(1u8));
+        for delta in [0u8, 1] {
+            assert_eq!(
+                scalar_mul_x(&k, &to_le_32(&(p() + BigUint::from(delta)))),
+                Err(EcsmError::CoordinateOutOfRange),
+                "xG = p + {delta} must be rejected"
+            );
+        }
+        // p − 1 is below the bound, so it must NOT hit the canonicity check
+        // (it is not on the curve, which is a different error).
+        assert_eq!(
+            scalar_mul_x(&k, &to_le_32(&(p() - BigUint::from(1u8)))),
+            Err(EcsmError::NotOnCurve)
+        );
+    }
+}
diff --git a/crypto/ecsm/src/witness.rs b/crypto/ecsm/src/witness.rs
new file mode 100644
index 000000000..44bc49d4c
--- /dev/null
+++ b/crypto/ecsm/src/witness.rs
@@ -0,0 +1,502 @@
+//! ECSM / ECDAS witness generation.
+//!
+//! For one `ECALL`, the prover must fill the byte-limb witnesses that the ECSM and ECDAS
+//! chips constrain: the `yG` reconstruction, the scalar range data, and — per double/add
+//! step — the slope `λ`, three quotients, and three carry arrays. This module computes all
+//! of them by literally reproducing the spec's limb-convolution recurrences, so the values
+//! it emits satisfy the AIR constraints by construction.
+//!
+//! ## Limb-convolution carries
+//!
+//! Each "`x ≡ y mod p`" relation is expressed in the spec as a 512-bit integer identity
+//! `LHS − RHS = 0`, written limb-by-limb (8-bit limbs) with a chain of carries:
+//! `2^8·c_i = c_{i-1} + S_i`, `c_{-1} = 0`, closing with `c_63 = 0` (see `ecsm.typ`
+//! "Discussing the carries"). `S_i` is the coefficient of `2^{8i}` in `LHS − RHS`
+//! (a sum of byte products — the convolution — plus single-limb terms). Carries can be
+//! negative; the chip range-checks `c_i + offset` as a halfword. We reproduce the exact
+//! integer recurrence here; the prover converts the resulting integers to field elements.
+
+use num_bigint::{BigInt, BigUint};
+use num_traits::{Signed, Zero};
+
+use crate::curve::{StepPts, replay_double_and_add};
+use crate::{B, EcsmError, P_BYTES, n, p, prepare, to_le_32};
+
+/// Full ECSM-chip witness for one scalar multiplication (one ECSM row).
+#[derive(Debug, Clone)]
+pub struct EcsmWitness {
+    pub x_g: [u8; 32],
+    pub y_g: [u8; 32],
+    pub k: [u8; 32],
+    /// `x2 = xG^2 mod p`
+    pub x2: [u8; 32],
+    /// quotient for the `x2` relation
+    pub q0: [u8; 32],
+    /// carries for the `x2` relation
+    pub c0: [i64; 64],
+    /// quotient for the `yG` relation (33 bytes; byte 32 is a single bit)
+    pub q1: [u8; 33],
+    /// carries for the `yG` relation
+    pub c1: [i64; 64],
+    /// `(k - N) mod 2^256`
+    pub k_sub_n: [u8; 32],
+    /// `(xR - p) mod 2^256`
+    pub x_r_sub_p: [u8; 32],
+    /// position of the most significant set bit of `k`
+    pub len_k: u8,
+    pub x_r: [u8; 32],
+    pub y_r: [u8; 32],
+    /// the double/add steps (one ECDAS row each; empty when `k == 1`)
+    pub steps: Vec<EcdasStep>,
+}
+
+/// Full ECDAS-chip witness for one double/add step (one ECDAS row).
+#[derive(Debug, Clone)]
+pub struct EcdasStep {
+    pub x_a: [u8; 32],
+    pub y_a: [u8; 32],
+    pub x_g: [u8; 32],
+    pub y_g: [u8; 32],
+    pub round: u8,
+    /// 0 = double, 1 = add
+    pub op: u8,
+    /// op-flag of the next step (1 ⇒ next row adds at this round)
+    pub next_op: u8,
+    pub lambda: [u8; 32],
+    pub x_r: [u8; 32],
+    pub y_r: [u8; 32],
+    /// quotient for the `λ` relation (33 bytes)
+    pub q0: [u8; 33],
+    /// quotient for the `xR` relation (33 bytes)
+    pub q1: [u8; 33],
+    /// quotient for the `yR` relation (33 bytes)
+    pub q2: [u8; 33],
+    pub c0: [i64; 64],
+    pub c1: [i64; 64],
+    pub c2: [i64; 64],
+}
+
+// =========================================================================
+// Limb helpers
+// =========================================================================
+
+/// Zero-extends a little-endian byte slice (≤ 64 bytes) to 64 `i128` limbs.
+fn ext64(bytes: &[u8]) -> [i128; 64] {
+    let mut a = [0i128; 64];
+    for (i, &b) in bytes.iter().enumerate() {
+        a[i] = b as i128;
+    }
+    a
+}
+
+/// Convolution `Σ_{j=0}^{i} a[j]·b[i-j]`.
+fn conv(a: &[i128; 64], b: &[i128; 64], i: usize) -> i128 {
+    let mut s = 0i128;
+    for j in 0..=i {
+        s += a[j] * b[i - j];
+    }
+    s
+}
+
+/// Computes the 64 carries from per-limb terms via `2^8·c_i = c_{i-1} + terms_i`,
+/// `c_{-1} = 0`, asserting exact divisibility at every limb and the closing `c_63 = 0`.
+///
+/// These asserts catch any transcription error in the `terms` builders: for valid inputs
+/// the relation `LHS − RHS = 0` holds exactly, so every partial sum is divisible by 256.
+fn limb_carries(terms: &[i128; 64]) -> [i64; 64] {
+    let mut c = [0i64; 64];
+    let mut carry: i128 = 0;
+    for i in 0..64 {
+        let s = carry + terms[i];
+        assert!(s % 256 == 0, "ECSM witness: limb {i} not divisible by 256");
+        carry = s / 256;
+        c[i] = carry as i64;
+    }
+    assert!(c[63] == 0, "ECSM witness: closing carry c_63 must be 0");
+    c
+}
+
+// =========================================================================
+// Per-relation carry builders (mirror the spec TOML polys exactly)
+// =========================================================================
+
+/// ECSM `x2` relation: `xG^2 − x2 − q0·p = 0`.
+fn carries_x2(xg: &[i128; 64], x2: &[i128; 64], q0: &[i128; 64], pp: &[i128; 64]) -> [i64; 64] {
+    let mut terms = [0i128; 64];
+    for i in 0..64 {
+        terms[i] = conv(xg, xg, i) - x2[i] - conv(q0, pp, i);
+    }
+    limb_carries(&terms)
+}
+
+/// ECSM `yG` relation: `yG^2 + p^2 − xG·x2 − b − q1·p = 0`.
+fn carries_yg(
+    yg: &[i128; 64],
+    pp: &[i128; 64],
+    x2: &[i128; 64],
+    xg: &[i128; 64],
+    q1: &[i128; 64],
+    b: &[i128; 64],
+) -> [i64; 64] {
+    let mut terms = [0i128; 64];
+    for i in 0..64 {
+        terms[i] = conv(yg, yg, i) + conv(pp, pp, i) - conv(x2, xg, i) - conv(q1, pp, i) - b[i];
+    }
+    limb_carries(&terms)
+}
+
+/// ECDAS `λ` relation:
+/// `op·(λ(xG−xA) − yG + yA) + (1−op)(2λyA − 3xA²) + (r − q0)p = 0`.
+#[allow(clippy::too_many_arguments)]
+fn carries_lambda(
+    op: u8,
+    lam: &[i128; 64],
+    xg: &[i128; 64],
+    xa: &[i128; 64],
+    ya: &[i128; 64],
+    yg: &[i128; 64],
+    r: &[i128; 64],
+    pp: &[i128; 64],
+    q0: &[i128; 64],
+) -> [i64; 64] {
+    let mut terms = [0i128; 64];
+    for i in 0..64 {
+        let branch = if op == 1 {
+            // op · (Σ_j λ_j (xG_{i-j} − xA_{i-j}) + (yA_i − yG_i))
+            let mut s = ya[i] - yg[i];
+            for j in 0..=i {
+                s += lam[j] * (xg[i - j] - xa[i - j]);
+            }
+            s
+        } else {
+            // (1−op) · Σ_j (2 λ_j yA_{i-j} − 3 xA_j xA_{i-j})
+            let mut s = 0i128;
+            for j in 0..=i {
+                s += 2 * lam[j] * ya[i - j] - 3 * xa[j] * xa[i - j];
+            }
+            s
+        };
+        terms[i] = branch + conv(r, pp, i) - conv(q0, pp, i);
+    }
+    limb_carries(&terms)
+}
+
+/// ECDAS `xR` relation:
+/// `λ² − xA − xG − xR − (1−op)(xA − xG) + (r − q1)p = 0`.
+#[allow(clippy::too_many_arguments)]
+fn carries_xr(
+    op: u8,
+    lam: &[i128; 64],
+    xa: &[i128; 64],
+    xg: &[i128; 64],
+    xr: &[i128; 64],
+    r: &[i128; 64],
+    pp: &[i128; 64],
+    q1: &[i128; 64],
+) -> [i64; 64] {
+    let mut terms = [0i128; 64];
+    for i in 0..64 {
+        let op_term = if op == 0 { xa[i] - xg[i] } else { 0 };
+        terms[i] =
+            conv(lam, lam, i) - xa[i] - xg[i] - xr[i] - op_term + conv(r, pp, i) - conv(q1, pp, i);
+    }
+    limb_carries(&terms)
+}
+
+/// ECDAS `yR` relation: `λ(xA − xR) − yA − yR + (r − q2)p = 0`.
+#[allow(clippy::too_many_arguments)]
+fn carries_yr(
+    lam: &[i128; 64],
+    xa: &[i128; 64],
+    xr: &[i128; 64],
+    ya: &[i128; 64],
+    yr: &[i128; 64],
+    r: &[i128; 64],
+    pp: &[i128; 64],
+    q2: &[i128; 64],
+) -> [i64; 64] {
+    let mut terms = [0i128; 64];
+    for i in 0..64 {
+        let mut conv_lam = 0i128;
+        for j in 0..=i {
+            conv_lam += lam[j] * (xa[i - j] - xr[i - j]);
+        }
+        terms[i] = conv_lam - ya[i] - yr[i] + conv(r, pp, i) - conv(q2, pp, i);
+    }
+    limb_carries(&terms)
+}
+
+// =========================================================================
+// BigInt helpers
+// =========================================================================
+
+/// Little-endian 33 bytes of a non-negative value that fits in 264 bits.
+fn to_le_33(v: &BigUint) -> [u8; 33] {
+    let mut bytes = v.to_bytes_le();
+    assert!(bytes.len() <= 33, "ECSM witness: quotient exceeds 33 bytes");
+    bytes.resize(33, 0);
+    let mut out = [0u8; 33];
+    out.copy_from_slice(&bytes[..33]);
+    out
+}
+
+/// `r + numerator / p`, where `numerator` must be divisible by `p`. Asserts divisibility
+/// and that the result is non-negative (guaranteed by the spec quotient ranges).
+fn shifted_quotient(numerator: &BigInt, p_big: &BigInt, r_big: &BigInt) -> BigUint {
+    assert!(
+        (numerator % p_big).is_zero(),
+        "ECSM witness: numerator not divisible by p"
+    );
+    let q = r_big + numerator / p_big;
+    assert!(
+        !q.is_negative(),
+        "ECSM witness: quotient unexpectedly negative"
+    );
+    q.to_biguint().expect("non-negative")
+}
+
+// =========================================================================
+// Witness construction
+// =========================================================================
+
+/// Computes the full ECSM/ECDAS witness for `k·G` over secp256k1, given `k` and `xG` as
+/// little-endian 32-byte values. This is the prover's entry point.
+pub fn compute_witness(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<EcsmWitness, EcsmError> {
+    let (k, g) = prepare(k_le, xg_le)?;
+
+    let p_big = BigInt::from(p());
+    let r_big = BigInt::from(BigUint::from(3u8) * p()); // r = 3p
+    let r_bytes_33 = to_le_33(&(BigUint::from(3u8) * p()));
+
+    // Common zero-extended constants.
+    let pp = ext64(&P_BYTES);
+    let r_ext = ext64(&r_bytes_33);
+    let b_bytes = {
+        let mut a = [0u8; 32];
+        a[0] = B as u8;
+        a
+    };
+    let b_ext = ext64(&b_bytes);
+
+    // --- ECSM: x2 = xG^2 mod p, quotient q0 ---
+    let xg_sq = &g.x * &g.x;
+    let x2_big = &xg_sq % p();
+    let q0_big = (&xg_sq - &x2_big) / p(); // exact
+    let xg_b = to_le_32(&g.x);
+    let yg_b = to_le_32(&g.y);
+    let x2_b = to_le_32(&x2_big);
+    let q0_b = to_le_32(&q0_big);
+    let c0 = carries_x2(&ext64(&xg_b), &ext64(&x2_b), &ext64(&q0_b), &pp);
+
+    // --- ECSM: yG relation, quotient q1 = (yG^2 − xG·x2 − b)/p + p ---
+    let num_yg = BigInt::from(&g.y * &g.y) - BigInt::from(&g.x * &x2_big) - BigInt::from(B);
+    let q1_big = shifted_quotient(&num_yg, &p_big, &p_big);
+    let q1_b = to_le_33(&q1_big);
+    let c1 = carries_yg(
+        &ext64(&yg_b),
+        &pp,
+        &ext64(&x2_b),
+        &ext64(&xg_b),
+        &ext64(&q1_b),
+        &b_ext,
+    );
+
+    // --- scalar range data ---
+    let len_k = crate::curve::msb_position(&k) as u8;
+    let two_256 = BigUint::from(1u8) << 256u32;
+    let k_sub_n = to_le_32(&((&two_256 + &k) - n())); // k < N
+
+    // --- double/add replay ---
+    let (steps_pts, result) = replay_double_and_add(&k, &g);
+    let x_r = to_le_32(&result.x);
+    let y_r = to_le_32(&result.y);
+    let x_r_sub_p = to_le_32(&((&two_256 + &result.x) - p()));
+
+    let steps = steps_pts
+        .iter()
+        .map(|s| build_step(s, &p_big, &r_big, &r_ext, &pp))
+        .collect();
+
+    Ok(EcsmWitness {
+        x_g: xg_b,
+        y_g: yg_b,
+        k: *k_le,
+        x2: x2_b,
+        q0: q0_b,
+        c0,
+        q1: q1_b,
+        c1,
+        k_sub_n,
+        x_r_sub_p,
+        len_k,
+        x_r,
+        y_r,
+        steps,
+    })
+}
+
+/// Builds one ECDAS step witness (λ, quotients, carries) from a point-level step.
+fn build_step(
+    s: &StepPts,
+    p_big: &BigInt,
+    r_big: &BigInt,
+    r_ext: &[i128; 64],
+    pp: &[i128; 64],
+) -> EcdasStep {
+    // λ is precomputed (batched) during the double-and-add replay.
+    let lam_b = to_le_32(&s.lambda);
+    let xa_b = to_le_32(&s.a.x);
+    let ya_b = to_le_32(&s.a.y);
+    let xg_b = to_le_32(&s.g.x);
+    let yg_b = to_le_32(&s.g.y);
+    let xr_b = to_le_32(&s.r.x);
+    let yr_b = to_le_32(&s.r.y);
+
+    let (lam_ext, xa_ext, ya_ext, xg_ext, yg_ext, xr_ext, yr_ext) = (
+        ext64(&lam_b),
+        ext64(&xa_b),
+        ext64(&ya_b),
+        ext64(&xg_b),
+        ext64(&yg_b),
+        ext64(&xr_b),
+        ext64(&yr_b),
+    );
+
+    let lam_i = BigInt::from(s.lambda.clone());
+    let xa_i = BigInt::from(s.a.x.clone());
+    let ya_i = BigInt::from(s.a.y.clone());
+    let xg_i = BigInt::from(s.g.x.clone());
+    let yg_i = BigInt::from(s.g.y.clone());
+    let xr_i = BigInt::from(s.r.x.clone());
+    let yr_i = BigInt::from(s.r.y.clone());
+
+    // q0: λ relation numerator.
+    let num0 = if s.op == 1 {
+        (&xg_i - &xa_i) * &lam_i - &yg_i + &ya_i
+    } else {
+        2 * &lam_i * &ya_i - 3 * &xa_i * &xa_i
+    };
+    let q0_big = shifted_quotient(&num0, p_big, r_big);
+    let q0_b = to_le_33(&q0_big);
+
+    // q1: xR relation numerator  λ² − xA − xG − xR + (1−op)(xG − xA).
+    let mut num1 = &lam_i * &lam_i - &xa_i - &xg_i - &xr_i;
+    if s.op == 0 {
+        num1 += &xg_i - &xa_i;
+    }
+    let q1_big = shifted_quotient(&num1, p_big, r_big);
+    let q1_b = to_le_33(&q1_big);
+
+    // q2: yR relation numerator  λ(xA − xR) − yA − yR.
+    let num2 = &lam_i * (&xa_i - &xr_i) - &ya_i - &yr_i;
+    let q2_big = shifted_quotient(&num2, p_big, r_big);
+    let q2_b = to_le_33(&q2_big);
+
+    let c0 = carries_lambda(
+        s.op,
+        &lam_ext,
+        &xg_ext,
+        &xa_ext,
+        &ya_ext,
+        &yg_ext,
+        r_ext,
+        pp,
+        &ext64(&q0_b),
+    );
+    let c1 = carries_xr(
+        s.op,
+        &lam_ext,
+        &xa_ext,
+        &xg_ext,
+        &xr_ext,
+        r_ext,
+        pp,
+        &ext64(&q1_b),
+    );
+    let c2 = carries_yr(
+        &lam_ext,
+        &xa_ext,
+        &xr_ext,
+        &ya_ext,
+        &yr_ext,
+        r_ext,
+        pp,
+        &ext64(&q2_b),
+    );
+
+    EcdasStep {
+        x_a: xa_b,
+        y_a: ya_b,
+        x_g: xg_b,
+        y_g: yg_b,
+        round: s.round,
+        op: s.op,
+        next_op: s.next_op,
+        lambda: lam_b,
+        x_r: xr_b,
+        y_r: yr_b,
+        q0: q0_b,
+        q1: q1_b,
+        q2: q2_b,
+        c0,
+        c1,
+        c2,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::scalar_mul_x;
+
+    fn gx_le() -> [u8; 32] {
+        let gx = BigUint::parse_bytes(
+            b"79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798",
+            16,
+        )
+        .unwrap();
+        to_le_32(&gx)
+    }
+
+    /// Drives `compute_witness` (whose internal asserts validate every carry/quotient)
+    /// across many scalars, and cross-checks the result against the reference scalar mul.
+    #[test]
+    fn witness_is_self_consistent_for_many_scalars() {
+        let gx = gx_le();
+        // small scalars plus bit patterns that exercise add/double scheduling
+        let scalars: &[u64] = &[1, 2, 3, 4, 5, 7, 8, 0xFF, 0x101, 0xABCD, 0xFFFF, 123456789];
+        for &kv in scalars {
+            let k = to_le_32(&BigUint::from(kv));
+            let w = compute_witness(&k, &gx).expect("witness");
+            // final point matches reference
+            assert_eq!(w.x_r, scalar_mul_x(&k, &gx).unwrap(), "k = {kv}");
+            // len_k is the true MSB position
+            assert_eq!(w.len_k as u32, 63 - (kv.leading_zeros()), "k = {kv}");
+        }
+    }
+
+    #[test]
+    fn k_one_has_no_ecdas_steps() {
+        let w = compute_witness(&to_le_32(&BigUint::from(1u8)), &gx_le()).unwrap();
+        assert!(w.steps.is_empty());
+        assert_eq!(w.x_r, w.x_g); // 1·G = G
+        assert_eq!(w.len_k, 0);
+    }
+
+    #[test]
+    fn ecdas_step_schedule_matches_double_and_add() {
+        // k = 5 = 0b101: double(G)->2G [bit1=0], double(2G)->4G [bit0=1], add(4G,G)->5G.
+        let w = compute_witness(&to_le_32(&BigUint::from(5u8)), &gx_le()).unwrap();
+        assert_eq!(w.len_k, 2);
+        let ops: Vec<(u8, u8, u8)> = w.steps.iter().map(|s| (s.round, s.op, s.next_op)).collect();
+        assert_eq!(ops, vec![(1, 0, 0), (0, 0, 1), (0, 1, 0)]);
+    }
+
+    #[test]
+    fn witness_works_near_curve_order() {
+        let gx = gx_le();
+        let w = compute_witness(&to_le_32(&(n() - BigUint::from(1u8))), &gx).unwrap();
+        assert_eq!(w.x_r, gx); // (N-1)·G = -G shares x with G
+        assert_eq!(w.len_k, 255);
+    }
+}
diff --git a/executor/Cargo.toml b/executor/Cargo.toml
index d03fcd15c..280d3ba6b 100644
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 thiserror = "1.0.68"
 rustc-demangle = "0.1"
+ecsm = { path = "../crypto/ecsm" }
 
 [dev-dependencies]
 serde = { version = "1.0", features = ["derive"] }
diff --git a/executor/programs/asm/test_ecsm.s b/executor/programs/asm/test_ecsm.s
new file mode 100644
index 000000000..670eae487
--- /dev/null
+++ b/executor/programs/asm/test_ecsm.s
@@ -0,0 +1,46 @@
+	.attribute	5, "rv64i2p1_m2p0_zmmul1p0"
+.Lfunc_end0:
+	.globl	main
+main:
+	# Stack layout (96 bytes): xG at sp+0, k at sp+32, xR at sp+64.
+	addi	sp, sp, -96
+
+	# xG = secp256k1 Gx, little-endian (4 doublewords).
+	li	t0, 0x59F2815B16F81798
+	sd	t0, 0(sp)
+	li	t0, 0x029BFCDB2DCE28D9
+	sd	t0, 8(sp)
+	li	t0, 0x55A06295CE870B07
+	sd	t0, 16(sp)
+	li	t0, 0x79BE667EF9DCBBAC
+	sd	t0, 24(sp)
+
+	# k = 5 (little-endian); exercises double, double, add.
+	li	t0, 5
+	sd	t0, 32(sp)
+	sd	zero, 40(sp)
+	sd	zero, 48(sp)
+	sd	zero, 56(sp)
+
+	# ECSM ecall: a0 = &xR, a1 = &xG, a2 = &k, a7 = -3.
+	addi	a0, sp, 64
+	addi	a1, sp, 0
+	addi	a2, sp, 32
+	li	a7, -3
+	ecall
+
+	# Commit the 32-byte result xR so the test can check it equals x(5G).
+	# Commit syscall: a0 = fd(1), a1 = buf_addr, a2 = count, a7 = 64.
+	li	a0, 1
+	addi	a1, sp, 64
+	li	a2, 32
+	li	a7, 64
+	ecall
+
+	# Restore stack and halt.
+	addi	sp, sp, 96
+	li	a0, 0
+	li	a7, 93
+	ecall
+.Lfunc_end1:
+	.size	main, .Lfunc_end1-main
diff --git a/executor/programs/asm/test_ecsm_multi.s b/executor/programs/asm/test_ecsm_multi.s
new file mode 100644
index 000000000..67aff4021
--- /dev/null
+++ b/executor/programs/asm/test_ecsm_multi.s
@@ -0,0 +1,71 @@
+	.attribute	5, "rv64i2p1_m2p0_zmmul1p0"
+.Lfunc_end0:
+	.globl	main
+main:
+	# Stack layout (96 bytes): xG at sp+0, k at sp+32, xR at sp+64.
+	addi	sp, sp, -96
+
+	# xG = secp256k1 Gx, little-endian (written once; reused by all calls).
+	li	t0, 0x59F2815B16F81798
+	sd	t0, 0(sp)
+	li	t0, 0x029BFCDB2DCE28D9
+	sd	t0, 8(sp)
+	li	t0, 0x55A06295CE870B07
+	sd	t0, 16(sp)
+	li	t0, 0x79BE667EF9DCBBAC
+	sd	t0, 24(sp)
+
+	# k's high doublewords stay zero for all calls; only k[0] changes.
+	sd	zero, 40(sp)
+	sd	zero, 48(sp)
+	sd	zero, 56(sp)
+
+	# --- call 1: k = 1 (no ECDAS steps; start/final tuples cancel directly) ---
+	li	t0, 1
+	sd	t0, 32(sp)
+	addi	a0, sp, 64
+	addi	a1, sp, 0
+	addi	a2, sp, 32
+	li	a7, -3
+	ecall
+	li	a0, 1
+	addi	a1, sp, 64
+	li	a2, 32
+	li	a7, 64
+	ecall
+
+	# --- call 2: k = 5 (double, double, add) ---
+	li	t0, 5
+	sd	t0, 32(sp)
+	addi	a0, sp, 64
+	addi	a1, sp, 0
+	addi	a2, sp, 32
+	li	a7, -3
+	ecall
+	li	a0, 1
+	addi	a1, sp, 64
+	li	a2, 32
+	li	a7, 64
+	ecall
+
+	# --- call 3: k = 0xABCDEF (24-bit; many doubles + several adds) ---
+	li	t0, 0xABCDEF
+	sd	t0, 32(sp)
+	addi	a0, sp, 64
+	addi	a1, sp, 0
+	addi	a2, sp, 32
+	li	a7, -3
+	ecall
+	li	a0, 1
+	addi	a1, sp, 64
+	li	a2, 32
+	li	a7, 64
+	ecall
+
+	# Restore stack and halt.
+	addi	sp, sp, 96
+	li	a0, 0
+	li	a7, 93
+	ecall
+.Lfunc_end1:
+	.size	main, .Lfunc_end1-main
diff --git a/executor/programs/rust/ecsm/.cargo/config.toml b/executor/programs/rust/ecsm/.cargo/config.toml
new file mode 100644
index 000000000..ca99a3f45
--- /dev/null
+++ b/executor/programs/rust/ecsm/.cargo/config.toml
@@ -0,0 +1,5 @@
+[target.riscv64im-lambda-vm-elf]
+rustflags = [
+  "--cfg", "getrandom_backend=\"custom\"",
+  "-C", "passes=lower-atomic"
+]
diff --git a/executor/programs/rust/ecsm/Cargo.lock b/executor/programs/rust/ecsm/Cargo.lock
new file mode 100644
index 000000000..d0e71eeb0
--- /dev/null
+++ b/executor/programs/rust/ecsm/Cargo.lock
@@ -0,0 +1,331 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "const-default"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa"
+
+[[package]]
+name = "critical-section"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
+
+[[package]]
+name = "ecsm"
+version = "0.1.0"
+dependencies = [
+ "lambda-vm-syscalls",
+]
+
+[[package]]
+name = "embedded-alloc"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd"
+dependencies = [
+ "const-default",
+ "critical-section",
+ "linked_list_allocator",
+ "rlsf",
+]
+
+[[package]]
+name = "embedded-hal"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "lambda-vm-syscalls"
+version = "0.1.0"
+dependencies = [
+ "embedded-alloc",
+ "getrandom 0.2.17",
+ "getrandom 0.3.4",
+ "lazy_static",
+ "rand",
+ "riscv",
+ "thiserror",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "linked_list_allocator"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b23ac50abb8261cb38c6e2a7192d3302e0836dac1628f6a93b82b4fad185897"
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "riscv"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05cfa3f7b30c84536a9025150d44d26b8e1cc20ddf436448d74cd9591eefb25"
+dependencies = [
+ "critical-section",
+ "embedded-hal",
+ "paste",
+ "riscv-macros",
+ "riscv-pac",
+]
+
+[[package]]
+name = "riscv-macros"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d323d13972c1b104aa036bc692cd08b822c8bbf23d79a27c526095856499799"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "riscv-pac"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
+
+[[package]]
+name = "rlsf"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1646a59a9734b8b7a0ac51689388a60fe1625d4b956348e9de07591a1478457a"
+dependencies = [
+ "cfg-if",
+ "const-default",
+ "libc",
+ "rustversion",
+ "svgbobdoc",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "svgbobdoc"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50"
+dependencies = [
+ "base64",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "unicode-width",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.3+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.51"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e5361301a1d9e5dd94c524eb99365fbaed5b237e831d7f45e2ddea11ffe8627"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.51"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "422033a2245cb4b6ff8def11b2dfaf184a2ab2573f5af28082a163a68889af0e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
diff --git a/executor/programs/rust/ecsm/Cargo.toml b/executor/programs/rust/ecsm/Cargo.toml
new file mode 100644
index 000000000..c99ea4e06
--- /dev/null
+++ b/executor/programs/rust/ecsm/Cargo.toml
@@ -0,0 +1,9 @@
+[workspace]
+
+[package]
+name = "ecsm"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+lambda-vm-syscalls = { path = "../../../../syscalls" }
diff --git a/executor/programs/rust/ecsm/src/main.rs b/executor/programs/rust/ecsm/src/main.rs
new file mode 100644
index 000000000..709d4a4ae
--- /dev/null
+++ b/executor/programs/rust/ecsm/src/main.rs
@@ -0,0 +1,20 @@
+use lambda_vm_syscalls as syscalls;
+
+/// Computes 5·G on secp256k1 via the ECSM precompile (Rust-guest path) and commits the
+/// 32-byte x-coordinate as public output.
+pub fn main() {
+    // secp256k1 Gx, given big-endian then reversed to little-endian for the precompile.
+    let mut xg: [u8; 32] = [
+        0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    xg.reverse();
+
+    let mut k = [0u8; 32];
+    k[0] = 5;
+
+    let mut xr = [0u8; 32];
+    syscalls::syscalls::ecsm_mul(&mut xr, &xg, &k);
+    syscalls::syscalls::commit(&xr);
+}
diff --git a/executor/src/tests/ecsm_tests.rs b/executor/src/tests/ecsm_tests.rs
new file mode 100644
index 000000000..486865a64
--- /dev/null
+++ b/executor/src/tests/ecsm_tests.rs
@@ -0,0 +1,151 @@
+//! Tests for the ECSM (elliptic-curve scalar multiplication) syscall.
+
+use crate::vm::instruction::decoding::Instruction;
+use crate::vm::instruction::execution::{ECSM_SYSCALL_NUMBER, ExecutionError};
+use crate::vm::memory::Memory;
+use crate::vm::registers::Registers;
+
+/// secp256k1 generator x-coordinate, little-endian.
+fn gx_le() -> [u8; 32] {
+    let mut be = [
+        0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    be.reverse();
+    be
+}
+
+fn write_u256_le(memory: &mut Memory, addr: u64, bytes: &[u8; 32]) {
+    for i in 0..4 {
+        let mut dw = [0u8; 8];
+        dw.copy_from_slice(&bytes[i * 8..i * 8 + 8]);
+        memory
+            .store_doubleword(addr + (i as u64) * 8, u64::from_le_bytes(dw))
+            .unwrap();
+    }
+}
+
+fn read_u256_le(memory: &Memory, addr: u64) -> [u8; 32] {
+    let mut out = [0u8; 32];
+    for i in 0..4 {
+        let dw = memory.load_doubleword(addr + (i as u64) * 8).unwrap();
+        out[i * 8..i * 8 + 8].copy_from_slice(&dw.to_le_bytes());
+    }
+    out
+}
+
+/// Runs the ECSM syscall with the given scalar (as little-endian bytes) and `xG`,
+/// returning the `xR` written back to memory.
+fn run_ecsm(k_le: &[u8; 32], xg_le: &[u8; 32]) -> Result<[u8; 32], ExecutionError> {
+    let mut pc = 0;
+    let mut registers = Registers::default();
+    let mut memory = Memory::default();
+
+    let addr_xr = 0x1000u64;
+    let addr_xg = 0x2000u64;
+    let addr_k = 0x3000u64;
+    write_u256_le(&mut memory, addr_xg, xg_le);
+    write_u256_le(&mut memory, addr_k, k_le);
+
+    registers.write(17, ECSM_SYSCALL_NUMBER).unwrap();
+    registers.write(10, addr_xr).unwrap();
+    registers.write(11, addr_xg).unwrap();
+    registers.write(12, addr_k).unwrap();
+
+    Instruction::EcallEbreak.run(&mut pc, &mut registers, &mut memory)?;
+    Ok(read_u256_le(&memory, addr_xr))
+}
+
+fn k_le(v: u64) -> [u8; 32] {
+    let mut k = [0u8; 32];
+    k[..8].copy_from_slice(&v.to_le_bytes());
+    k
+}
+
+#[test]
+fn ecsm_syscall_writes_correct_result() {
+    let xg = gx_le();
+    // 1·G = G
+    assert_eq!(run_ecsm(&k_le(1), &xg).unwrap(), xg);
+    // Matches the reference scalar multiplication for several scalars.
+    for v in [2u64, 3, 5, 0xFFFF, 1_000_003] {
+        assert_eq!(
+            run_ecsm(&k_le(v), &xg).unwrap(),
+            ecsm::scalar_mul_x(&k_le(v), &xg).unwrap(),
+            "k = {v}"
+        );
+    }
+}
+
+#[test]
+fn ecsm_syscall_rejects_zero_scalar() {
+    let err = run_ecsm(&k_le(0), &gx_le()).unwrap_err();
+    assert!(matches!(
+        err,
+        ExecutionError::Ecsm(ecsm::EcsmError::ScalarIsZero)
+    ));
+}
+
+#[test]
+fn ecsm_syscall_rejects_non_canonical_xg() {
+    // xG = p + 1 (the alias of x = 1) must error, not silently reduce: with
+    // k = 1 the executor would echo the non-canonical bytes back as xR, which
+    // the prover's xR < p range check cannot prove.
+    let mut xg = ecsm::P_BYTES;
+    xg[0] += 1; // p ends in 0x2F little-endian, so no carry
+    let err = run_ecsm(&k_le(1), &xg).unwrap_err();
+    assert!(matches!(
+        err,
+        ExecutionError::Ecsm(ecsm::EcsmError::CoordinateOutOfRange)
+    ));
+}
+
+/// Runs the ECSM syscall with caller-chosen operand addresses, `xG = Gx` and `k = 5`.
+fn run_ecsm_at(addr_xr: u64, addr_xg: u64, addr_k: u64) -> Result<(), ExecutionError> {
+    let mut pc = 0;
+    let mut registers = Registers::default();
+    let mut memory = Memory::default();
+    write_u256_le(&mut memory, addr_xg, &gx_le());
+    write_u256_le(&mut memory, addr_k, &k_le(5));
+    registers.write(17, ECSM_SYSCALL_NUMBER).unwrap();
+    registers.write(10, addr_xr).unwrap();
+    registers.write(11, addr_xg).unwrap();
+    registers.write(12, addr_k).unwrap();
+    Instruction::EcallEbreak.run(&mut pc, &mut registers, &mut memory)?;
+    Ok(())
+}
+
+#[test]
+fn ecsm_syscall_rejects_overlapping_xg_k() {
+    // xG and k are read at the same proof timestamp, so overlapping ranges
+    // would make the trace unprovable — the executor must reject them upfront.
+    for addr_k in [0x2000u64, 0x2008, 0x2018, 0x1FE8] {
+        let err = run_ecsm_at(0x1000, 0x2000, addr_k).unwrap_err();
+        assert!(
+            matches!(err, ExecutionError::EcsmOperandOverlap),
+            "addr_k = {addr_k:#x} overlaps addr_xg and must be rejected"
+        );
+    }
+    // Touching-but-disjoint ranges are fine (boundary: |diff| = 32)...
+    run_ecsm_at(0x1000, 0x2000, 0x2020).expect("disjoint k above xG must run");
+    run_ecsm_at(0x1000, 0x2000, 0x1FE0).expect("disjoint k below xG must run");
+    // ...and xR may alias xG (its accesses are offset to later timestamps).
+    run_ecsm_at(0x2000, 0x2000, 0x3000).expect("xR aliasing xG is allowed");
+}
+
+#[test]
+fn ecsm_syscall_rejects_address_overflow() {
+    // addr_k near the lower-limb boundary so (addr mod 2^32) + 31 overflows.
+    let mut pc = 0;
+    let mut registers = Registers::default();
+    let mut memory = Memory::default();
+    registers.write(17, ECSM_SYSCALL_NUMBER).unwrap();
+    registers.write(10, 0x1000).unwrap();
+    registers.write(11, 0x2000).unwrap();
+    registers.write(12, 0xFFFF_FFF0).unwrap(); // (mod 2^32) + 31 ≥ 2^32
+    let err = Instruction::EcallEbreak
+        .run(&mut pc, &mut registers, &mut memory)
+        .unwrap_err();
+    assert!(matches!(err, ExecutionError::EcsmAddressOverflow));
+}
diff --git a/executor/src/tests/mod.rs b/executor/src/tests/mod.rs
index 448a05dee..456607433 100644
--- a/executor/src/tests/mod.rs
+++ b/executor/src/tests/mod.rs
@@ -1,3 +1,4 @@
+pub mod ecsm_tests;
 pub mod flamegraph_tests;
 pub mod keccak_tests;
 pub mod memory_tests;
diff --git a/executor/src/vm/instruction/execution.rs b/executor/src/vm/instruction/execution.rs
index d9b0e1c8d..0922a878e 100644
--- a/executor/src/vm/instruction/execution.rs
+++ b/executor/src/vm/instruction/execution.rs
@@ -1,7 +1,7 @@
 use crate::vm::{
     instruction::decoding::{ArithOp, Comparison, Instruction, LoadStoreWidth},
     logs::Log,
-    memory::Memory,
+    memory::{Memory, MemoryError},
     registers::Registers,
 };
 
@@ -14,6 +14,8 @@ pub enum SyscallNumbers {
     Panic = 2,
     Commit = 64,
     Halt = 93,
+    // Placeholder discriminant. The actual syscall value is ECSM_SYSCALL_NUMBER.
+    Ecsm = 94,
 }
 
 /// Syscall number for KeccakPermute (u64::MAX - 1 = 0xFFFF_FFFF_FFFF_FFFE).
@@ -22,6 +24,17 @@ pub enum SyscallNumbers {
 pub const KECCAK_SYSCALL_NUMBER: u64 = u64::MAX - 1;
 const KECCAK_STATE_BYTES: u64 = 25 * 8;
 
+/// Syscall number for the ECSM (elliptic-curve scalar multiply) accelerator.
+///
+/// The spec uses ECALL number `-3`; interpreted as an unsigned 64-bit value that is
+/// `u64::MAX - 2 = 0xFFFF_FFFF_FFFF_FFFD`, which the ECSM core table puts on the `Ecall`
+/// bus as `[lo32, hi32] = [2^32 - 3, 2^32 - 1]`.
+pub const ECSM_SYSCALL_NUMBER: u64 = u64::MAX - 2;
+
+/// `2^32`. The lower 32-bit limb of an address must not overflow when the small per-access
+/// offsets are added (ECSM spec address-alignment assumptions).
+const LOW_LIMB: u64 = 1 << 32;
+
 impl TryFrom<u64> for SyscallNumbers {
     type Error = ();
     fn try_from(value: u64) -> Result<Self, Self::Error> {
@@ -31,11 +44,37 @@ impl TryFrom<u64> for SyscallNumbers {
             64 => Ok(SyscallNumbers::Commit),
             93 => Ok(SyscallNumbers::Halt),
             v if v == KECCAK_SYSCALL_NUMBER => Ok(SyscallNumbers::KeccakPermute),
+            v if v == ECSM_SYSCALL_NUMBER => Ok(SyscallNumbers::Ecsm),
             _ => Err(()),
         }
     }
 }
 
+/// Reads a 256-bit little-endian value as four doublewords at `addr + 8i`.
+fn load_u256_le(memory: &Memory, addr: u64) -> Result<[u8; 32], MemoryError> {
+    let mut out = [0u8; 32];
+    for i in 0..4 {
+        let dw = memory.load_doubleword(addr + (i as u64) * 8)?;
+        out[i * 8..i * 8 + 8].copy_from_slice(&dw.to_le_bytes());
+    }
+    Ok(out)
+}
+
+/// Writes a 256-bit little-endian value as four doublewords at `addr + 8i`.
+fn store_u256_le(memory: &mut Memory, addr: u64, bytes: &[u8; 32]) -> Result<(), MemoryError> {
+    for i in 0..4 {
+        let mut dw = [0u8; 8];
+        dw.copy_from_slice(&bytes[i * 8..i * 8 + 8]);
+        memory.store_doubleword(addr + (i as u64) * 8, u64::from_le_bytes(dw))?;
+    }
+    Ok(())
+}
+
+/// Checks the ECSM address-alignment assumption: `(addr mod 2^32) + max_offset < 2^32`.
+fn ecsm_addr_ok(addr: u64, max_offset: u64) -> bool {
+    (addr % LOW_LIMB) + max_offset < LOW_LIMB
+}
+
 impl Instruction {
     /// Runs the given instruction and returns its execution log
     pub fn run(
@@ -359,6 +398,36 @@ impl Instruction {
                         }
                         src2_val = state_addr;
                     }
+                    SyscallNumbers::Ecsm => {
+                        // ECSM(-3): k×G on secp256k1.
+                        // x10 = addr to write xR, x11 = addr of xG, x12 = addr of k.
+                        // xG, k, xR are 32-byte little-endian values.
+                        let addr_xr = registers.read(10)?;
+                        let addr_xg = registers.read(11)?;
+                        let addr_k = registers.read(12)?;
+                        if !ecsm_addr_ok(addr_xg, 24)
+                            || !ecsm_addr_ok(addr_xr, 24)
+                            || !ecsm_addr_ok(addr_k, 31)
+                        {
+                            return Err(ExecutionError::EcsmAddressOverflow);
+                        }
+                        // xG and k are both read at the same proof timestamp, so their
+                        // 32-byte ranges must be disjoint or the trace is unprovable
+                        // (MEMW orders accesses per address by strictly increasing
+                        // timestamp). xR may alias either: its accesses are offset to
+                        // later timestamps.
+                        if addr_xg.abs_diff(addr_k) < 32 {
+                            return Err(ExecutionError::EcsmOperandOverlap);
+                        }
+                        let xg = load_u256_le(memory, addr_xg)?;
+                        let k = load_u256_le(memory, addr_k)?;
+                        let xr = ecsm::scalar_mul_x(&k, &xg)?;
+                        store_u256_le(memory, addr_xr, &xr)?;
+                        // Carry the input addresses for the prover; addr_xR = x10 is recovered
+                        // from the register state.
+                        src2_val = addr_xg;
+                        dst_val = addr_k;
+                    }
                     SyscallNumbers::Halt => {
                         // halt
                         return Ok(Log {
@@ -535,6 +604,12 @@ pub enum ExecutionError {
     UnalignedKeccakStateAddress(u64),
     #[error("Keccak state address range overflows: {0:#018x}")]
     KeccakStateAddressOverflow(u64),
+    #[error("ECSM address range overflows the lower 32-bit limb")]
+    EcsmAddressOverflow,
+    #[error("ECSM xG and k operand ranges overlap")]
+    EcsmOperandOverlap,
+    #[error("ECSM scalar multiplication error: {0}")]
+    Ecsm(#[from] ecsm::EcsmError),
 }
 
 // =============================================================================
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index 90c723732..da9ceb9af 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -18,6 +18,7 @@ stark = { path = "../crypto/stark" }
 crypto = { path = "../crypto/crypto" }
 math = { path = "../crypto/math" }
 executor = { path = "../executor" }
+ecsm = { path = "../crypto/ecsm" }
 serde = { version = "1.0", features = ["derive"] }
 rayon = { version = "1.8.0", optional = true }
 sysinfo = { version = "0.31", default-features = false, features = ["system"] }
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index e11c539b5..81233d39f 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -49,11 +49,11 @@ use crate::tables::trace_builder::count_table_lengths;
 use crate::tables::types::BusId;
 use crate::test_utils::{
     E, F, VmAir, create_bitwise_air, create_branch_air, create_bytewise_air, create_commit_air,
-    create_cpu_air, create_cpu32_air, create_decode_air, create_dvrm_air, create_eq_air,
-    create_halt_air, create_keccak_air, create_keccak_rc_air, create_keccak_rnd_air,
-    create_load_air, create_lt_air, create_memw_air, create_memw_aligned_air,
-    create_memw_register_air, create_mul_air, create_page_air, create_register_air,
-    create_shift_air, create_store_air,
+    create_cpu_air, create_cpu32_air, create_decode_air, create_dvrm_air, create_ec_scalar_air,
+    create_ecdas_air, create_ecsm_air, create_eq_air, create_halt_air, create_keccak_air,
+    create_keccak_rc_air, create_keccak_rnd_air, create_load_air, create_lt_air, create_memw_air,
+    create_memw_aligned_air, create_memw_register_air, create_mul_air, create_page_air,
+    create_register_air, create_shift_air, create_store_air,
 };
 
 use stark::proof::options::{GoldilocksCubicProofOptions, ProofOptions};
@@ -71,6 +71,11 @@ pub struct RuntimePageRange {
     pub count: u64,
 }
 
+/// Number of tables that always contribute exactly one sub-proof, regardless
+/// of `TableCounts`: bitwise, decode, halt, commit, keccak, keccak_rnd,
+/// keccak_rc, register, ecsm, ec_scalar, ecdas.
+pub const FIXED_TABLE_COUNT: usize = 11;
+
 /// Number of chunks for each split table.
 /// The verifier needs this to reconstruct matching AIRs.
 #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
@@ -223,6 +228,9 @@ pub(crate) struct VmAirs {
     pub keccak: VmAir,
     pub keccak_rnd: VmAir,
     pub keccak_rc: VmAir,
+    pub ecsm: VmAir,
+    pub ec_scalar: VmAir,
+    pub ecdas: VmAir,
     pub register: VmAir,
     pub pages: Vec<VmAir>,
     pub memw_registers: Vec<VmAir>,
@@ -244,6 +252,9 @@ impl VmAirs {
             (&self.keccak, &mut traces.keccak, &()),
             (&self.keccak_rnd, &mut traces.keccak_rnd, &()),
             (&self.keccak_rc, &mut traces.keccak_rc, &()),
+            (&self.ecsm, &mut traces.ecsm, &()),
+            (&self.ec_scalar, &mut traces.ec_scalar, &()),
+            (&self.ecdas, &mut traces.ecdas, &()),
             (&self.register, &mut traces.register, &()),
         ];
 
@@ -314,6 +325,9 @@ impl VmAirs {
             &self.keccak,
             &self.keccak_rnd,
             &self.keccak_rc,
+            &self.ecsm,
+            &self.ec_scalar,
+            &self.ecdas,
             &self.register,
         ];
 
@@ -454,6 +468,9 @@ impl VmAirs {
             tables::keccak_rc::preprocessed_commitment(proof_options),
             tables::keccak_rc::NUM_PRECOMPUTED_COLS,
         );
+        let ecsm = create_ecsm_air(proof_options);
+        let ec_scalar = create_ec_scalar_air(proof_options);
+        let ecdas = create_ecdas_air(proof_options);
         let register = create_register_air(proof_options).with_preprocessed(
             register::preprocessed_commitment(proof_options, elf.entry_point),
             register::NUM_PREPROCESSED_COLS,
@@ -530,6 +547,9 @@ impl VmAirs {
             keccak,
             keccak_rnd,
             keccak_rc,
+            ecsm,
+            ec_scalar,
+            ecdas,
             register,
             pages,
             memw_registers,
@@ -890,11 +910,12 @@ pub fn verify_with_options(
     );
 
     // Cross-check: table_counts must match the number of sub-proofs.
-    // Fixed tables (bitwise, decode, halt, commit, keccak, keccak_rnd, keccak_rc, register) = 8, plus page tables.
-    let expected_proof_count = vm_proof.table_counts.total() + 8 + page_configs.len();
+    // FIXED_TABLE_COUNT always-present tables, plus page tables.
+    let expected_proof_count =
+        vm_proof.table_counts.total() + FIXED_TABLE_COUNT + page_configs.len();
     if expected_proof_count != vm_proof.proof.proofs.len() {
         return Err(Error::InvalidTableCounts(format!(
-            "table_counts total ({}) + 8 fixed + {} pages = {}, but proof contains {} sub-proofs",
+            "table_counts total ({}) + {FIXED_TABLE_COUNT} fixed + {} pages = {}, but proof contains {} sub-proofs",
             vm_proof.table_counts.total(),
             page_configs.len(),
             expected_proof_count,
diff --git a/prover/src/tables/cpu.rs b/prover/src/tables/cpu.rs
index ea5fc94dc..450595ec9 100644
--- a/prover/src/tables/cpu.rs
+++ b/prover/src/tables/cpu.rs
@@ -185,6 +185,9 @@ pub struct CpuOperation {
     pub ecall_keccak: bool,
     /// For KeccakPermute ECALLs: state address from x10.
     pub keccak_state_addr: u64,
+
+    /// Whether this ECALL is an ECSM (elliptic-curve scalar multiply) syscall
+    pub ecall_ecsm: bool,
 }
 
 impl CpuOperation {
@@ -228,6 +231,10 @@ impl CpuOperation {
         let ecall_keccak =
             f.ecall && log.src1_val == executor::vm::instruction::execution::KECCAK_SYSCALL_NUMBER;
         let keccak_state_addr = if ecall_keccak { log.src2_val } else { 0 };
+        // The ECSM operand addresses (x10/x11/x12) are recovered from the register state
+        // in the trace builder.
+        let ecall_ecsm =
+            f.ecall && log.src1_val == executor::vm::instruction::execution::ECSM_SYSCALL_NUMBER;
 
         // Word instructions are fully handled by CPU32; the main CPU row is a
         // delegate that only advances the PC and sends the CPU32 lookup. We still
@@ -345,6 +352,7 @@ impl CpuOperation {
             commit_count,
             ecall_keccak,
             keccak_state_addr,
+            ecall_ecsm,
         }
     }
 
diff --git a/prover/src/tables/ec_scalar.rs b/prover/src/tables/ec_scalar.rs
new file mode 100644
index 000000000..66c574116
--- /dev/null
+++ b/prover/src/tables/ec_scalar.rs
@@ -0,0 +1,374 @@
+//! EC_SCALAR chip — serves the scalar `k` bit-by-bit to the ECDAS chip.
+//!
+//! One row per scalar byte (32 rows per ECSM ecall, `offset` counting down 31→0). Each row
+//! receives a `ServeK[timestamp, ptr, offset]` token, reads byte `k[offset]` from memory,
+//! decomposes it into 8 bits, and sends one `Bit[timestamp, 8*offset + i]` token per set bit
+//! (the multiplicity is the bit itself). Unless `last_limb` (offset 0) it recurses by sending
+//! `ServeK[timestamp, ptr, offset-1]` — a self-referential bus, like COMMIT's `CommitNextByte`.
+//!
+//! ## Columns (15 total)
+//! - `timestamp`: DWordWL (2) — the ECALL timestamp
+//! - `ptr`: DWordWL (2) — address of `k` (= `addr_k`)
+//! - `offset`: Byte (1) — index of the scalar byte served by this row
+//! - `limb_bits`: Bit[8] (8) — bit decomposition of `k[offset]`
+//! - `last_limb`: Bit (1) — whether `offset == 0` (terminates the recursion)
+//! - `mu`: Bit (1) — multiplicity (1 for real rows, 0 for padding)
+//!
+//! `limb = Σ 2^i · limb_bits[i]` is virtual (a linear combination, never stored).
+
+use math::field::element::FieldElement;
+use math::field::traits::{IsField, IsSubFieldOf};
+use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator};
+use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing};
+use stark::table::TableView;
+use stark::trace::TraceTable;
+
+use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField};
+use crate::constraints::templates::new_is_bit_constraints;
+
+// =========================================================================
+// Column indices
+// =========================================================================
+
+pub mod cols {
+    pub const TIMESTAMP_0: usize = 0;
+    pub const TIMESTAMP_1: usize = 1;
+    pub const PTR_0: usize = 2;
+    pub const PTR_1: usize = 3;
+    pub const OFFSET: usize = 4;
+    /// limb_bits[0..8]
+    pub const LIMB_BITS: usize = 5;
+    pub const LAST_LIMB: usize = 13;
+    pub const MU: usize = 14;
+
+    pub const NUM_COLUMNS: usize = 15;
+
+    #[inline]
+    pub const fn limb_bit(i: usize) -> usize {
+        LIMB_BITS + i
+    }
+}
+
+// =========================================================================
+// Operation struct
+// =========================================================================
+
+/// One EC_SCALAR row: serving byte `offset` of the scalar at `ptr`.
+#[derive(Debug, Clone)]
+pub struct EcScalarOperation {
+    pub timestamp: u64,
+    pub ptr: u64,
+    pub offset: u8,
+    pub limb: u8,
+    pub last_limb: bool,
+}
+
+/// Expands a scalar `k` (little-endian bytes) and its ECALL timestamp / address into the
+/// 32 EC_SCALAR rows (offsets 31 down to 0).
+pub fn rows_for_scalar(timestamp: u64, addr_k: u64, k: &[u8; 32]) -> Vec<EcScalarOperation> {
+    (0..32)
+        .rev()
+        .map(|offset| EcScalarOperation {
+            timestamp,
+            ptr: addr_k,
+            offset: offset as u8,
+            limb: k[offset],
+            last_limb: offset == 0,
+        })
+        .collect()
+}
+
+// =========================================================================
+// Trace generation
+// =========================================================================
+
+pub fn generate_ec_scalar_trace(
+    ops: &[EcScalarOperation],
+) -> TraceTable<GoldilocksField, GoldilocksExtension> {
+    let n = ops.len();
+    let num_rows = n.next_power_of_two().max(4);
+    let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS];
+
+    for (row_idx, op) in ops.iter().enumerate() {
+        let base = row_idx * cols::NUM_COLUMNS;
+        data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF);
+        data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32);
+        data[base + cols::PTR_0] = FE::from(op.ptr & 0xFFFF_FFFF);
+        data[base + cols::PTR_1] = FE::from(op.ptr >> 32);
+        data[base + cols::OFFSET] = FE::from(op.offset as u64);
+        for i in 0..8 {
+            data[base + cols::limb_bit(i)] = FE::from(((op.limb >> i) & 1) as u64);
+        }
+        data[base + cols::LAST_LIMB] = FE::from(op.last_limb as u64);
+        data[base + cols::MU] = FE::one();
+    }
+
+    // Padding rows keep every field 0: all IS_BIT constraints hold (0 is a bit) and the
+    // implication constraints (a·b = 0) hold trivially.
+    TraceTable::new_main(data, cols::NUM_COLUMNS, 1)
+}
+
+// =========================================================================
+// Bus interactions
+// =========================================================================
+
+/// `limb = Σ 2^i · limb_bits[i]` as a single bus element (used as the byte value in MEMW).
+fn limb_value() -> BusValue {
+    BusValue::linear(
+        (0..8)
+            .map(|i| LinearTerm::Column {
+                coefficient: 1i64 << i,
+                column: cols::limb_bit(i),
+            })
+            .collect(),
+    )
+}
+
+pub fn bus_interactions() -> Vec<BusInteraction> {
+    let ts = || {
+        [
+            BusValue::Packed {
+                start_column: cols::TIMESTAMP_0,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: cols::TIMESTAMP_1,
+                packing: Packing::Direct,
+            },
+        ]
+    };
+    let ptr = || {
+        [
+            BusValue::Packed {
+                start_column: cols::PTR_0,
+                packing: Packing::Direct,
+            },
+            BusValue::Packed {
+                start_column: cols::PTR_1,
+                packing: Packing::Direct,
+            },
+        ]
+    };
+
+    let mut interactions = Vec::with_capacity(11);
+
+    // 1. Receive ServeK[timestamp, ptr, offset] (mult = mu).
+    {
+        let [t0, t1] = ts();
+        let [p0, p1] = ptr();
+        interactions.push(BusInteraction::receiver(
+            BusId::ServeK,
+            Multiplicity::Column(cols::MU),
+            vec![
+                t0,
+                t1,
+                p0,
+                p1,
+                BusValue::Packed {
+                    start_column: cols::OFFSET,
+                    packing: Packing::Direct,
+                },
+            ],
+        ));
+    }
+
+    // 2. MEMW: read byte k[offset] at ptr+offset, timestamp+1, width 1 (mult = mu).
+    // CO24 layout: [old[8], is_register, base[2], value[8], ts[2], w2, w4, w8].
+    {
+        let base_lo = BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::PTR_0,
+            },
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::OFFSET,
+            },
+        ]);
+        let base_hi = BusValue::Packed {
+            start_column: cols::PTR_1,
+            packing: Packing::Direct,
+        };
+        let ts_lo_plus_1 = BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::TIMESTAMP_0,
+            },
+            LinearTerm::Constant(1),
+        ]);
+        let ts_hi = BusValue::Packed {
+            start_column: cols::TIMESTAMP_1,
+            packing: Packing::Direct,
+        };
+        let mut values = Vec::with_capacity(24);
+        // old[0..8]: read value = limb, rest 0
+        values.push(limb_value());
+        for _ in 1..8 {
+            values.push(BusValue::constant(0));
+        }
+        values.push(BusValue::constant(0)); // is_register = 0
+        values.push(base_lo);
+        values.push(base_hi);
+        // value[0..8]: same as old (read)
+        values.push(limb_value());
+        for _ in 1..8 {
+            values.push(BusValue::constant(0));
+        }
+        values.push(ts_lo_plus_1);
+        values.push(ts_hi);
+        values.push(BusValue::constant(0)); // w2
+        values.push(BusValue::constant(0)); // w4
+        values.push(BusValue::constant(0)); // w8 (width 1 byte)
+        interactions.push(BusInteraction::sender(
+            BusId::Memw,
+            Multiplicity::Column(cols::MU),
+            values,
+        ));
+    }
+
+    // 3. Send Bit[timestamp, 8*offset + i] for each set bit (mult = limb_bits[i]).
+    for i in 0..8 {
+        let [t0, t1] = ts();
+        interactions.push(BusInteraction::sender(
+            BusId::Bit,
+            Multiplicity::Column(cols::limb_bit(i)),
+            vec![
+                t0,
+                t1,
+                BusValue::linear(vec![
+                    LinearTerm::Column {
+                        coefficient: 8,
+                        column: cols::OFFSET,
+                    },
+                    LinearTerm::Constant(i as i64),
+                ]),
+            ],
+        ));
+    }
+
+    // 4. Recurse: send ServeK[timestamp, ptr, offset-1] (mult = mu - last_limb).
+    {
+        let [t0, t1] = ts();
+        let [p0, p1] = ptr();
+        interactions.push(BusInteraction::sender(
+            BusId::ServeK,
+            Multiplicity::Diff(cols::MU, cols::LAST_LIMB),
+            vec![
+                t0,
+                t1,
+                p0,
+                p1,
+                BusValue::linear(vec![
+                    LinearTerm::Column {
+                        coefficient: 1,
+                        column: cols::OFFSET,
+                    },
+                    LinearTerm::Constant(-1),
+                ]),
+            ],
+        ));
+    }
+
+    interactions
+}
+
+// =========================================================================
+// Constraints
+// =========================================================================
+
+/// `a · b = 0` or `a · (1 - b) = 0` (degree 2), used for the spec's implication
+/// constraints (`limb_bits_i = 1 ⇒ μ = 1`, `last_limb ⇒ μ`, `last_limb ⇒ offset = 0`).
+pub struct MulZeroConstraint {
+    pub a: usize,
+    pub b: usize,
+    /// when true, the second factor is `(1 - b)` instead of `b`
+    pub b_complement: bool,
+    pub constraint_idx: usize,
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for MulZeroConstraint {
+    fn degree(&self) -> usize {
+        2
+    }
+
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let a = step.get_main_evaluation_element(0, self.a).clone();
+        let b = step.get_main_evaluation_element(0, self.b).clone();
+        if self.b_complement {
+            a * (FieldElement::<F>::one() - b)
+        } else {
+            a * b
+        }
+    }
+}
+
+/// Creates all EC_SCALAR transition constraints (20 total).
+pub fn create_constraints(
+    constraint_idx_start: usize,
+) -> (
+    Vec<Box<dyn TransitionConstraintEvaluator<GoldilocksField, GoldilocksExtension>>>,
+    usize,
+) {
+    let mut constraints: Vec<
+        Box<dyn TransitionConstraintEvaluator<GoldilocksField, GoldilocksExtension>>,
+    > = Vec::with_capacity(20);
+    let mut idx = constraint_idx_start;
+
+    // IS_BIT for mu, limb_bits[0..8], last_limb.
+    let mut bit_cols = vec![cols::MU];
+    bit_cols.extend((0..8).map(cols::limb_bit));
+    bit_cols.push(cols::LAST_LIMB);
+    let (bit_constraints, next) = new_is_bit_constraints(&bit_cols, idx);
+    for c in bit_constraints {
+        constraints.push(c.boxed());
+    }
+    idx = next;
+
+    // limb_bits[i] = 1 ⇒ mu = 1  :  limb_bits[i] · (1 - mu) = 0
+    for i in 0..8 {
+        constraints.push(
+            MulZeroConstraint {
+                a: cols::limb_bit(i),
+                b: cols::MU,
+                b_complement: true,
+                constraint_idx: idx,
+            }
+            .boxed(),
+        );
+        idx += 1;
+    }
+
+    // last_limb = 1 ⇒ mu = 1  :  last_limb · (1 - mu) = 0
+    constraints.push(
+        MulZeroConstraint {
+            a: cols::LAST_LIMB,
+            b: cols::MU,
+            b_complement: true,
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    // last_limb = 1 ⇒ offset = 0  :  last_limb · offset = 0
+    constraints.push(
+        MulZeroConstraint {
+            a: cols::LAST_LIMB,
+            b: cols::OFFSET,
+            b_complement: false,
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    (constraints, idx)
+}
diff --git a/prover/src/tables/ecdas.rs b/prover/src/tables/ecdas.rs
new file mode 100644
index 000000000..48183b810
--- /dev/null
+++ b/prover/src/tables/ecdas.rs
@@ -0,0 +1,518 @@
+//! ECDAS chip — one double/add step of the scalar-multiplication sequence.
+//!
+//! Each row receives an accumulator `(A, G, round, op)` on the self-referential `Ecdas`
+//! bus, computes `R = 2A` (op=0) or `R = A + G` (op=1) via three byte-limb convolution
+//! relations (`λ`, `xR`, `yR`, each with a 33-byte quotient + 64-entry carry array and the
+//! offset `r = 3p`), and sends the updated accumulator back with `round − (1 − next_op)`
+//! and `next_op`. When `next_op = 1` it consumes the scalar bit at `round` on the `Bit`
+//! bus (an add follows). ECSM seeds and drains the bus; interior rows telescope.
+//!
+//! See `spec/ecdas.toml`. Constraints are **unconditional**; padding rows set the quotients
+//! to `r` and `op = 0`, which makes every relation hold with zero carries.
+
+use math::field::element::FieldElement;
+use math::field::traits::{IsField, IsSubFieldOf};
+use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator};
+use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing};
+use stark::table::TableView;
+use stark::trace::TraceTable;
+
+use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField};
+use crate::constraints::templates::IsBitConstraint;
+use crate::tables::ecsm::ecdas_tuple;
+use ecsm::{EcdasStep, P_BYTES};
+
+/// `r = 3·p` as 33 little-endian bytes (the spec offset that keeps all quotients positive).
+pub const R_BYTES: [u8; 33] = [
+    0x8D, 0xF4, 0xFF, 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+    0x02,
+];
+
+// =========================================================================
+// Column indices (~521 columns)
+// =========================================================================
+
+pub mod cols {
+    pub const TIMESTAMP_0: usize = 0;
+    pub const TIMESTAMP_1: usize = 1;
+    pub const XG: usize = 2; // U256BL (32)
+    pub const YG: usize = 34;
+    pub const XA: usize = 66;
+    pub const YA: usize = 98;
+    pub const ROUND: usize = 130; // Byte
+    pub const OP: usize = 131; // Bit
+    pub const XR: usize = 132; // U256BL (32)
+    pub const YR: usize = 164;
+    pub const LAMBDA: usize = 196; // U256BL (32)
+    pub const Q0: usize = 228; // Byte[33]
+    pub const C0: usize = 261; // BaseField[64]
+    pub const Q1: usize = 325; // Byte[33]
+    pub const C1: usize = 358; // BaseField[64]
+    pub const Q2: usize = 422; // Byte[33]
+    pub const C2: usize = 455; // BaseField[64]
+    pub const NEXT_OP: usize = 519; // Bit
+    pub const MU: usize = 520;
+
+    pub const NUM_COLUMNS: usize = 521;
+
+    #[inline]
+    pub const fn c0(i: usize) -> usize {
+        C0 + i
+    }
+    #[inline]
+    pub const fn c1(i: usize) -> usize {
+        C1 + i
+    }
+    #[inline]
+    pub const fn c2(i: usize) -> usize {
+        C2 + i
+    }
+}
+
+// =========================================================================
+// Operation struct
+// =========================================================================
+
+/// One ECDAS row: a double/add step witness plus its ECALL timestamp.
+#[derive(Debug, Clone)]
+pub struct EcdasOperation {
+    pub timestamp: u64,
+    pub step: EcdasStep,
+}
+
+// =========================================================================
+// Trace generation
+// =========================================================================
+
+fn fe_from_i64(c: i64) -> FE {
+    if c >= 0 {
+        FE::from(c as u64)
+    } else {
+        FE::zero() - FE::from((-c) as u64)
+    }
+}
+
+fn write_bytes(data: &mut [FE], base: usize, col: usize, bytes: &[u8]) {
+    for (i, &b) in bytes.iter().enumerate() {
+        data[base + col + i] = FE::from(b as u64);
+    }
+}
+
+pub fn generate_ecdas_trace(
+    ops: &[EcdasOperation],
+) -> TraceTable<GoldilocksField, GoldilocksExtension> {
+    let n = ops.len();
+    let num_rows = n.next_power_of_two().max(4);
+    let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS];
+
+    for (row_idx, op) in ops.iter().enumerate() {
+        let base = row_idx * cols::NUM_COLUMNS;
+        let s = &op.step;
+
+        data[base + cols::TIMESTAMP_0] = FE::from(op.timestamp & 0xFFFF_FFFF);
+        data[base + cols::TIMESTAMP_1] = FE::from(op.timestamp >> 32);
+        write_bytes(&mut data, base, cols::XG, &s.x_g);
+        write_bytes(&mut data, base, cols::YG, &s.y_g);
+        write_bytes(&mut data, base, cols::XA, &s.x_a);
+        write_bytes(&mut data, base, cols::YA, &s.y_a);
+        data[base + cols::ROUND] = FE::from(s.round as u64);
+        data[base + cols::OP] = FE::from(s.op as u64);
+        write_bytes(&mut data, base, cols::XR, &s.x_r);
+        write_bytes(&mut data, base, cols::YR, &s.y_r);
+        write_bytes(&mut data, base, cols::LAMBDA, &s.lambda);
+        write_bytes(&mut data, base, cols::Q0, &s.q0);
+        write_bytes(&mut data, base, cols::Q1, &s.q1);
+        write_bytes(&mut data, base, cols::Q2, &s.q2);
+        for i in 0..64 {
+            data[base + cols::c0(i)] = fe_from_i64(s.c0[i]);
+            data[base + cols::c1(i)] = fe_from_i64(s.c1[i]);
+            data[base + cols::c2(i)] = fe_from_i64(s.c2[i]);
+        }
+        data[base + cols::NEXT_OP] = FE::from(s.next_op as u64);
+        data[base + cols::MU] = FE::one();
+    }
+
+    // Padding rows: q0 = q1 = q2 = r, op = 0, everything else 0. This makes every
+    // (unconditional) convolution relation hold with zero carries.
+    for row_idx in n..num_rows {
+        let base = row_idx * cols::NUM_COLUMNS;
+        write_bytes(&mut data, base, cols::Q0, &R_BYTES);
+        write_bytes(&mut data, base, cols::Q1, &R_BYTES);
+        write_bytes(&mut data, base, cols::Q2, &R_BYTES);
+    }
+
+    TraceTable::new_main(data, cols::NUM_COLUMNS, 1)
+}
+
+// =========================================================================
+// Bus interactions
+// =========================================================================
+
+fn packed(col: usize) -> BusValue {
+    BusValue::Packed {
+        start_column: col,
+        packing: Packing::Direct,
+    }
+}
+
+pub fn bus_interactions() -> Vec<BusInteraction> {
+    let mu = || Multiplicity::Column(cols::MU);
+    let ts_lo = || packed(cols::TIMESTAMP_0);
+    let ts_hi = || packed(cols::TIMESTAMP_1);
+    let mut out = Vec::new();
+
+    // Receive [ts, xA, yA, xG, yG, round, op].
+    out.push(BusInteraction::receiver(
+        BusId::Ecdas,
+        mu(),
+        ecdas_tuple(
+            cols::XA,
+            cols::YA,
+            cols::XG,
+            cols::YG,
+            packed(cols::ROUND),
+            packed(cols::OP),
+            ts_lo(),
+            ts_hi(),
+        ),
+    ));
+
+    // IS_BYTE range checks (single byte → AreBytes[x, 0]).
+    let is_byte = |col: usize, len: usize, out: &mut Vec<BusInteraction>| {
+        for i in 0..len {
+            out.push(BusInteraction::sender(
+                BusId::AreBytes,
+                Multiplicity::Column(cols::MU),
+                vec![packed(col + i), BusValue::constant(0)],
+            ));
+        }
+    };
+    is_byte(cols::ROUND, 1, &mut out);
+    is_byte(cols::LAMBDA, 32, &mut out);
+    is_byte(cols::Q0, 33, &mut out);
+    is_byte(cols::XR, 32, &mut out);
+    is_byte(cols::Q1, 33, &mut out);
+    is_byte(cols::YR, 32, &mut out);
+    is_byte(cols::Q2, 33, &mut out);
+
+    // IS_HALF range checks on the carries (offsets keep them in [0, 2^16)).
+    let half = |col: usize, off: i64| {
+        BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: col,
+            },
+            LinearTerm::Constant(off),
+        ])
+    };
+    for (base, off) in [(cols::C0, 32636i64), (cols::C1, 8161), (cols::C2, 16320)] {
+        for i in 0..63 {
+            out.push(BusInteraction::sender(
+                BusId::IsHalfword,
+                mu(),
+                vec![half(base + i, off)],
+            ));
+        }
+    }
+
+    // Receive Bit[ts, round] when adding next (mult = next_op).
+    out.push(BusInteraction::receiver(
+        BusId::Bit,
+        Multiplicity::Column(cols::NEXT_OP),
+        vec![ts_lo(), ts_hi(), packed(cols::ROUND)],
+    ));
+
+    // Send the updated accumulator: [ts, xR, yR, xG, yG, round - 1 + next_op, next_op].
+    out.push(BusInteraction::sender(
+        BusId::Ecdas,
+        mu(),
+        ecdas_tuple(
+            cols::XR,
+            cols::YR,
+            cols::XG,
+            cols::YG,
+            BusValue::linear(vec![
+                LinearTerm::Column {
+                    coefficient: 1,
+                    column: cols::ROUND,
+                },
+                LinearTerm::Column {
+                    coefficient: 1,
+                    column: cols::NEXT_OP,
+                },
+                LinearTerm::Constant(-1),
+            ]),
+            packed(cols::NEXT_OP),
+            ts_lo(),
+            ts_hi(),
+        ),
+    ));
+
+    out
+}
+
+// =========================================================================
+// Constraints
+// =========================================================================
+
+fn p_byte<F: IsField>(m: usize) -> FieldElement<F> {
+    if m < 32 {
+        FieldElement::from(P_BYTES[m] as u64)
+    } else {
+        FieldElement::zero()
+    }
+}
+
+fn r_byte<F: IsField>(m: usize) -> FieldElement<F> {
+    if m < 33 {
+        FieldElement::from(R_BYTES[m] as u64)
+    } else {
+        FieldElement::zero()
+    }
+}
+
+#[derive(Clone, Copy)]
+pub enum Relation {
+    Lambda,
+    Xr,
+    Yr,
+}
+
+/// Unconditional convolution carry constraint at limb `i`: `2^8·c_i − c_{i-1} − S_i = 0`.
+pub struct ConvCarry {
+    pub relation: Relation,
+    pub i: usize,
+    pub constraint_idx: usize,
+}
+
+impl ConvCarry {
+    fn s_i<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let i = self.i;
+        let col = |c: usize| -> FieldElement<F> { step.get_main_evaluation_element(0, c).clone() };
+        // bytes (zero beyond the stored length)
+        let b = |base: usize, len: usize, j: usize| -> FieldElement<F> {
+            if j < len {
+                col(base + j)
+            } else {
+                FieldElement::zero()
+            }
+        };
+        let lam = |j: usize| b(cols::LAMBDA, 32, j);
+        let xg = |j: usize| b(cols::XG, 32, j);
+        let xa = |j: usize| b(cols::XA, 32, j);
+        let ya = |j: usize| b(cols::YA, 32, j);
+        let yg = |j: usize| b(cols::YG, 32, j);
+        let xr = |j: usize| b(cols::XR, 32, j);
+        let yr = |j: usize| b(cols::YR, 32, j);
+        let op = col(cols::OP);
+        let one = FieldElement::<F>::one();
+
+        // r·P − q·P convolution (shared structure across all three relations).
+        let rq = |qbase: usize| -> FieldElement<F> {
+            let mut s = FieldElement::<F>::zero();
+            for j in 0..=i {
+                s += (r_byte::<F>(j) - b(qbase, 33, j)) * p_byte::<F>(i - j);
+            }
+            s
+        };
+
+        match self.relation {
+            Relation::Lambda => {
+                // op·(Σ λ_j(xG-xA)_{i-j} + (yA_i - yG_i))
+                let mut op_branch = ya(i) - yg(i);
+                for j in 0..=i {
+                    op_branch += lam(j) * (xg(i - j) - xa(i - j));
+                }
+                // (1-op)·Σ (2 λ_j yA_{i-j} - 3 xA_j xA_{i-j})
+                let mut notop_branch = FieldElement::<F>::zero();
+                for j in 0..=i {
+                    notop_branch = notop_branch
+                        + FieldElement::<F>::from(2u64) * lam(j) * ya(i - j)
+                        - FieldElement::<F>::from(3u64) * xa(j) * xa(i - j);
+                }
+                op.clone() * op_branch + (one - op) * notop_branch + rq(cols::Q0)
+            }
+            Relation::Xr => {
+                // Σ λ_j λ_{i-j} − xA_i − xG_i − xR_i − (1-op)(xA_i − xG_i) + rq
+                let mut s = FieldElement::<F>::zero();
+                for j in 0..=i {
+                    s += lam(j) * lam(i - j);
+                }
+                s - xa(i) - xg(i) - xr(i) - (one - op) * (xa(i) - xg(i)) + rq(cols::Q1)
+            }
+            Relation::Yr => {
+                // Σ λ_j(xA-xR)_{i-j} − yA_i − yR_i + rq
+                let mut s = FieldElement::<F>::zero();
+                for j in 0..=i {
+                    s += lam(j) * (xa(i - j) - xr(i - j));
+                }
+                s - ya(i) - yr(i) + rq(cols::Q2)
+            }
+        }
+    }
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for ConvCarry {
+    fn degree(&self) -> usize {
+        match self.relation {
+            Relation::Lambda => 3, // op · (λ · Δx)
+            Relation::Xr | Relation::Yr => 2,
+        }
+    }
+
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let c_base = match self.relation {
+            Relation::Lambda => cols::C0,
+            Relation::Xr => cols::C1,
+            Relation::Yr => cols::C2,
+        };
+        let c_i = step.get_main_evaluation_element(0, c_base + self.i).clone();
+        let c_prev = if self.i == 0 {
+            FieldElement::<F>::zero()
+        } else {
+            step.get_main_evaluation_element(0, c_base + self.i - 1)
+                .clone()
+        };
+        FieldElement::<F>::from(256u64) * c_i - c_prev - self.s_i(step)
+    }
+}
+
+/// `col = 0` (unconditional, degree 1). Used for the closing `c_63 = 0`.
+pub struct ColIsZero {
+    pub col: usize,
+    pub constraint_idx: usize,
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for ColIsZero {
+    fn degree(&self) -> usize {
+        1
+    }
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        step.get_main_evaluation_element(0, self.col).clone()
+    }
+}
+
+/// `a · b = 0` or `a · (1 - b) = 0` (degree 2).
+pub struct MulZero {
+    pub a: usize,
+    pub b: usize,
+    pub b_complement: bool,
+    pub constraint_idx: usize,
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for MulZero {
+    fn degree(&self) -> usize {
+        2
+    }
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let a = step.get_main_evaluation_element(0, self.a).clone();
+        let b = step.get_main_evaluation_element(0, self.b).clone();
+        if self.b_complement {
+            a * (FieldElement::<F>::one() - b)
+        } else {
+            a * b
+        }
+    }
+}
+
+/// Creates all ECDAS transition constraints (199 total).
+pub fn create_constraints(
+    constraint_idx_start: usize,
+) -> (
+    Vec<Box<dyn TransitionConstraintEvaluator<GoldilocksField, GoldilocksExtension>>>,
+    usize,
+) {
+    let mut constraints: Vec<
+        Box<dyn TransitionConstraintEvaluator<GoldilocksField, GoldilocksExtension>>,
+    > = Vec::new();
+    let mut idx = constraint_idx_start;
+
+    // `op` needs no direct bit check: it is only ever the op field of an Ecdas bus token, and
+    // every producer of that token emits a bit there — ECSM seeds it with a constant 0, and
+    // each ECDAS step emits `next_op` (which is IS_BIT'd). The bus cannot be minted (IS_BIT(mu)
+    // blocks weight ≠ 1), so a row's received `op` is always in {0,1} and the λ/xR/yR selector
+    // `op·add + (1−op)·double` is well-defined.
+    for col in [cols::MU, cols::NEXT_OP] {
+        constraints.push(IsBitConstraint::unconditional(col, idx).boxed());
+        idx += 1;
+    }
+
+    // op · next_op = 0
+    constraints.push(
+        MulZero {
+            a: cols::OP,
+            b: cols::NEXT_OP,
+            b_complement: false,
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+    // next_op · (1 - mu) = 0
+    constraints.push(
+        MulZero {
+            a: cols::NEXT_OP,
+            b: cols::MU,
+            b_complement: true,
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    // λ, xR, yR convolution carries + closings.
+    for (relation, c_base) in [
+        (Relation::Lambda, cols::C0),
+        (Relation::Xr, cols::C1),
+        (Relation::Yr, cols::C2),
+    ] {
+        for i in 0..64 {
+            constraints.push(
+                ConvCarry {
+                    relation,
+                    i,
+                    constraint_idx: idx,
+                }
+                .boxed(),
+            );
+            idx += 1;
+        }
+        constraints.push(
+            ColIsZero {
+                col: c_base + 63,
+                constraint_idx: idx,
+            }
+            .boxed(),
+        );
+        idx += 1;
+    }
+
+    (constraints, idx)
+}
diff --git a/prover/src/tables/ecsm.rs b/prover/src/tables/ecsm.rs
new file mode 100644
index 000000000..2b4444656
--- /dev/null
+++ b/prover/src/tables/ecsm.rs
@@ -0,0 +1,939 @@
+//! ECSM core chip — orchestrates one secp256k1 scalar multiplication `k·G`.
+//!
+//! One row per `ECALL(-3)`. It reads `xG` and `k` from memory, witnesses `yG` and proves
+//! `yG² ≡ xG³ + b mod p` (via two byte-limb convolution relations with quotients `q0,q1`
+//! and 64-entry carry arrays `c0,c1`), enforces `0 < k < N` and `xR < p`, writes `xR` back,
+//! and delegates the double-and-add to ECDAS / EC_SCALAR over the `Ecdas`/`ServeK`/`Bit`
+//! buses.
+//!
+//! See `spec/ecsm.toml`. All multi-limb arithmetic uses 8-bit limbs; the witness is built
+//! by `ecsm::compute_witness`, which reproduces these exact recurrences.
+//!
+//! ## Padding
+//! Padding rows have `mu = 0`, all columns zero **except `q1`, which pads to `p`**. This makes
+//! both carry relations close on padding without gating the whole recurrence: the x² relation
+//! has no standalone constant (closes at all-zero), and the yG relation closes because the
+//! `p² − q1·p` offset cancels (`q1 = p`) and the curve constant `b` is multiplied by `µ` (so it
+//! drops when `µ = 0`). Only that single `µ·b` term is µ-gated. The range checks /
+//! virtual-carry checks remain µ-gated as before.
+
+use executor::vm::instruction::execution::ECSM_SYSCALL_NUMBER;
+use math::field::element::FieldElement;
+use math::field::traits::{IsField, IsSubFieldOf};
+use stark::constraints::transition::{TransitionConstraint, TransitionConstraintEvaluator};
+use stark::lookup::{BusInteraction, BusValue, LinearTerm, Multiplicity, Packing};
+use stark::table::TableView;
+use stark::trace::TraceTable;
+
+use super::types::{BusId, FE, GoldilocksExtension, GoldilocksField};
+use crate::constraints::templates::{INV_SHIFT_32, IsBitConstraint};
+use ecsm::{B, EcsmWitness, N_BYTES, P_BYTES};
+
+// =========================================================================
+// Column indices (~427 columns)
+// =========================================================================
+
+pub mod cols {
+    pub const TIMESTAMP_0: usize = 0;
+    pub const TIMESTAMP_1: usize = 1;
+    pub const ADDR_XG_0: usize = 2;
+    pub const ADDR_XG_1: usize = 3;
+    pub const ADDR_K_0: usize = 4;
+    pub const ADDR_K_1: usize = 5;
+    pub const ADDR_XR_0: usize = 6;
+    pub const ADDR_XR_1: usize = 7;
+
+    pub const XR: usize = 8; // U256BL (32)
+    pub const YR: usize = 40; // U256BL (32)
+    pub const K: usize = 72; // U256BL (32)
+    pub const LEN_K: usize = 104; // Byte
+    pub const XG: usize = 105; // U256BL (32)
+    pub const YG: usize = 137; // U256BL (32)
+    pub const X2: usize = 169; // U256BL (32)
+    pub const Q0: usize = 201; // U256BL (32)
+    pub const C0: usize = 233; // BaseField[64]
+    pub const Q1: usize = 297; // Byte[33]
+    pub const C1: usize = 330; // BaseField[64]
+    pub const K_SUB_N: usize = 394; // U256HL (16 halfwords)
+    pub const XR_SUB_P: usize = 410; // U256HL (16 halfwords)
+    pub const MU: usize = 426;
+
+    pub const NUM_COLUMNS: usize = 427;
+
+    #[inline]
+    pub const fn xr(i: usize) -> usize {
+        XR + i
+    }
+    #[inline]
+    pub const fn k(i: usize) -> usize {
+        K + i
+    }
+    #[inline]
+    pub const fn xg(i: usize) -> usize {
+        XG + i
+    }
+    #[inline]
+    pub const fn yg(i: usize) -> usize {
+        YG + i
+    }
+    #[inline]
+    pub const fn x2(i: usize) -> usize {
+        X2 + i
+    }
+    #[inline]
+    pub const fn q0(i: usize) -> usize {
+        Q0 + i
+    }
+    #[inline]
+    pub const fn c0(i: usize) -> usize {
+        C0 + i
+    }
+    #[inline]
+    pub const fn q1(i: usize) -> usize {
+        Q1 + i
+    }
+    #[inline]
+    pub const fn c1(i: usize) -> usize {
+        C1 + i
+    }
+    #[inline]
+    pub const fn k_sub_n(i: usize) -> usize {
+        K_SUB_N + i
+    }
+    #[inline]
+    pub const fn xr_sub_p(i: usize) -> usize {
+        XR_SUB_P + i
+    }
+}
+
+// =========================================================================
+// Operation struct
+// =========================================================================
+
+/// One ECSM ecall: the math witness plus the three memory addresses and timestamp.
+#[derive(Debug, Clone)]
+pub struct EcsmOperation {
+    pub timestamp: u64,
+    pub addr_xg: u64,
+    pub addr_k: u64,
+    pub addr_xr: u64,
+    pub witness: EcsmWitness,
+}
+
+// =========================================================================
+// Trace generation
+// =========================================================================
+
+/// Converts a signed carry to a field element (negatives wrap to `p − |c|`).
+fn fe_from_i64(c: i64) -> FE {
+    if c >= 0 {
+        FE::from(c as u64)
+    } else {
+        FE::zero() - FE::from((-c) as u64)
+    }
+}
+
+fn write_dword_wl(data: &mut [FE], base: usize, lo_col: usize, value: u64) {
+    data[base + lo_col] = FE::from(value & 0xFFFF_FFFF);
+    data[base + lo_col + 1] = FE::from(value >> 32);
+}
+
+fn write_bytes(data: &mut [FE], base: usize, col: usize, bytes: &[u8]) {
+    for (i, &b) in bytes.iter().enumerate() {
+        data[base + col + i] = FE::from(b as u64);
+    }
+}
+
+/// Writes a 32-byte little-endian value as 16 halfwords (U256HL).
+fn write_halfwords(data: &mut [FE], base: usize, col: usize, bytes: &[u8; 32]) {
+    for j in 0..16 {
+        let hw = bytes[2 * j] as u64 + ((bytes[2 * j + 1] as u64) << 8);
+        data[base + col + j] = FE::from(hw);
+    }
+}
+
+pub fn generate_ecsm_trace(
+    ops: &[EcsmOperation],
+) -> TraceTable<GoldilocksField, GoldilocksExtension> {
+    let n = ops.len();
+    let num_rows = n.next_power_of_two().max(4);
+    let mut data = vec![FE::zero(); num_rows * cols::NUM_COLUMNS];
+
+    for (row_idx, op) in ops.iter().enumerate() {
+        let base = row_idx * cols::NUM_COLUMNS;
+        let w = &op.witness;
+
+        write_dword_wl(&mut data, base, cols::TIMESTAMP_0, op.timestamp);
+        write_dword_wl(&mut data, base, cols::ADDR_XG_0, op.addr_xg);
+        write_dword_wl(&mut data, base, cols::ADDR_K_0, op.addr_k);
+        write_dword_wl(&mut data, base, cols::ADDR_XR_0, op.addr_xr);
+
+        write_bytes(&mut data, base, cols::XR, &w.x_r);
+        write_bytes(&mut data, base, cols::YR, &w.y_r);
+        write_bytes(&mut data, base, cols::K, &w.k);
+        data[base + cols::LEN_K] = FE::from(w.len_k as u64);
+        write_bytes(&mut data, base, cols::XG, &w.x_g);
+        write_bytes(&mut data, base, cols::YG, &w.y_g);
+        write_bytes(&mut data, base, cols::X2, &w.x2);
+        write_bytes(&mut data, base, cols::Q0, &w.q0);
+        write_bytes(&mut data, base, cols::Q1, &w.q1);
+        write_halfwords(&mut data, base, cols::K_SUB_N, &w.k_sub_n);
+        write_halfwords(&mut data, base, cols::XR_SUB_P, &w.x_r_sub_p);
+
+        for i in 0..64 {
+            data[base + cols::c0(i)] = fe_from_i64(w.c0[i]);
+            data[base + cols::c1(i)] = fe_from_i64(w.c1[i]);
+        }
+
+        data[base + cols::MU] = FE::one();
+    }
+
+    // Padding rows (`mu = 0`) must carry `q1 = p` so the yG carry relation closes: the
+    // `p² − q1·p` offset cancels and the µ-gated `b` term drops. Bytes 0..31 hold p; byte 32
+    // stays 0 (a valid IS_BIT value).
+    for row_idx in n..num_rows {
+        let base = row_idx * cols::NUM_COLUMNS;
+        write_bytes(&mut data, base, cols::Q1, &P_BYTES);
+    }
+
+    TraceTable::new_main(data, cols::NUM_COLUMNS, 1)
+}
+
+// =========================================================================
+// Bus value helpers
+// =========================================================================
+
+fn packed(col: usize) -> BusValue {
+    BusValue::Packed {
+        start_column: col,
+        packing: Packing::Direct,
+    }
+}
+
+/// `[old[8], is_register, base_lo, base_hi, value[8], ts_lo, ts_hi, w2, w4, w8]` —
+/// a 24-element MEMW **read** tuple (`old == value`).
+#[allow(clippy::too_many_arguments)]
+fn memw_read(
+    value: [BusValue; 8],
+    is_register: u64,
+    base_lo: BusValue,
+    base_hi: BusValue,
+    ts_lo: BusValue,
+    ts_hi: BusValue,
+    w2: u64,
+    w8: u64,
+) -> Vec<BusValue> {
+    let mut v = Vec::with_capacity(24);
+    v.extend(value.clone()); // old == value (read)
+    v.push(BusValue::constant(is_register));
+    v.push(base_lo);
+    v.push(base_hi);
+    v.extend(value);
+    v.push(ts_lo);
+    v.push(ts_hi);
+    v.push(BusValue::constant(w2));
+    v.push(BusValue::constant(0));
+    v.push(BusValue::constant(w8));
+    v
+}
+
+/// `[is_register, base_lo, base_hi, value[8], ts_lo, ts_hi, w2, w4, w8]` —
+/// a 16-element MEMW **write** tuple (MEMW table supplies `old`).
+fn memw_write(
+    value: [BusValue; 8],
+    base_lo: BusValue,
+    base_hi: BusValue,
+    ts_lo: BusValue,
+    ts_hi: BusValue,
+    w8: u64,
+) -> Vec<BusValue> {
+    let mut v = Vec::with_capacity(16);
+    v.push(BusValue::constant(0)); // is_register = 0 (memory)
+    v.push(base_lo);
+    v.push(base_hi);
+    v.extend(value);
+    v.push(ts_lo);
+    v.push(ts_hi);
+    v.push(BusValue::constant(0)); // w2
+    v.push(BusValue::constant(0)); // w4
+    v.push(BusValue::constant(w8));
+    v
+}
+
+/// The eight bytes of a 256-bit value at `col + 8*chunk` as MEMW value elements.
+fn dword_bytes(col: usize, chunk: usize) -> [BusValue; 8] {
+    std::array::from_fn(|b| packed(col + 8 * chunk + b))
+}
+
+/// A register value `[lo, hi, 0, 0, 0, 0, 0, 0]` as MEMW value elements.
+fn register_value(lo_col: usize, hi_col: usize) -> [BusValue; 8] {
+    let mut v: [BusValue; 8] = std::array::from_fn(|_| BusValue::constant(0));
+    v[0] = packed(lo_col);
+    v[1] = packed(hi_col);
+    v
+}
+
+/// The 32 bytes of a U256BL coordinate as bus elements (shared shape for the ECDAS bus,
+/// used identically by ECSM and ECDAS).
+pub fn point_coord_busvalues(col: usize) -> Vec<BusValue> {
+    (0..32).map(|b| packed(col + b)).collect()
+}
+
+// =========================================================================
+// Bus interactions
+// =========================================================================
+
+pub fn bus_interactions() -> Vec<BusInteraction> {
+    let mu = || Multiplicity::Column(cols::MU);
+    let ts_lo = || packed(cols::TIMESTAMP_0);
+    let ts_hi = || packed(cols::TIMESTAMP_1);
+    let mut out = Vec::new();
+
+    // ECALL receiver (mult = mu): [ts_lo, ts_hi, syscall_lo32, syscall_hi32].
+    out.push(BusInteraction::receiver(
+        BusId::Ecall,
+        mu(),
+        vec![
+            ts_lo(),
+            ts_hi(),
+            BusValue::constant(ECSM_SYSCALL_NUMBER & 0xFFFF_FFFF),
+            BusValue::constant(ECSM_SYSCALL_NUMBER >> 32),
+        ],
+    ));
+
+    // read x11 -> addr_xG (register read at ts).
+    out.push(BusInteraction::sender(
+        BusId::Memw,
+        mu(),
+        memw_read(
+            register_value(cols::ADDR_XG_0, cols::ADDR_XG_1),
+            1,
+            BusValue::constant(2 * 11),
+            BusValue::constant(0),
+            ts_lo(),
+            ts_hi(),
+            1,
+            0,
+        ),
+    ));
+    // read xG: 4 doublewords at addr_xG + 8i (ts).
+    for i in 0..4 {
+        let base_lo = BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::ADDR_XG_0,
+            },
+            LinearTerm::Constant((8 * i) as i64),
+        ]);
+        out.push(BusInteraction::sender(
+            BusId::Memw,
+            mu(),
+            memw_read(
+                dword_bytes(cols::XG, i),
+                0,
+                base_lo,
+                packed(cols::ADDR_XG_1),
+                ts_lo(),
+                ts_hi(),
+                0,
+                1,
+            ),
+        ));
+    }
+
+    // read x12 -> addr_k (register read at ts).
+    out.push(BusInteraction::sender(
+        BusId::Memw,
+        mu(),
+        memw_read(
+            register_value(cols::ADDR_K_0, cols::ADDR_K_1),
+            1,
+            BusValue::constant(2 * 12),
+            BusValue::constant(0),
+            ts_lo(),
+            ts_hi(),
+            1,
+            0,
+        ),
+    ));
+    // read k: 4 doublewords at addr_k + 8i (ts).
+    for i in 0..4 {
+        let base_lo = BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::ADDR_K_0,
+            },
+            LinearTerm::Constant((8 * i) as i64),
+        ]);
+        out.push(BusInteraction::sender(
+            BusId::Memw,
+            mu(),
+            memw_read(
+                dword_bytes(cols::K, i),
+                0,
+                base_lo,
+                packed(cols::ADDR_K_1),
+                ts_lo(),
+                ts_hi(),
+                0,
+                1,
+            ),
+        ));
+    }
+
+    // read x10 -> addr_xR (register read at ts + 1).
+    let ts_lo_plus = |d: i64| {
+        BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::TIMESTAMP_0,
+            },
+            LinearTerm::Constant(d),
+        ])
+    };
+    out.push(BusInteraction::sender(
+        BusId::Memw,
+        mu(),
+        memw_read(
+            register_value(cols::ADDR_XR_0, cols::ADDR_XR_1),
+            1,
+            BusValue::constant(2 * 10),
+            BusValue::constant(0),
+            ts_lo_plus(1),
+            ts_hi(),
+            1,
+            0,
+        ),
+    ));
+    // write xR: 4 doublewords at addr_xR + 8i (ts + 2).
+    for i in 0..4 {
+        let base_lo = BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: cols::ADDR_XR_0,
+            },
+            LinearTerm::Constant((8 * i) as i64),
+        ]);
+        out.push(BusInteraction::sender(
+            BusId::Memw,
+            mu(),
+            memw_write(
+                dword_bytes(cols::XR, i),
+                base_lo,
+                packed(cols::ADDR_XR_1),
+                ts_lo_plus(2),
+                ts_hi(),
+                1,
+            ),
+        ));
+    }
+
+    // IS_BYTE range checks (single byte → AreBytes[x, 0]).
+    let is_byte = |col: usize, len: usize, out: &mut Vec<BusInteraction>| {
+        for i in 0..len {
+            out.push(BusInteraction::sender(
+                BusId::AreBytes,
+                Multiplicity::Column(cols::MU),
+                vec![packed(col + i), BusValue::constant(0)],
+            ));
+        }
+    };
+    is_byte(cols::X2, 32, &mut out);
+    is_byte(cols::Q0, 32, &mut out);
+    is_byte(cols::YG, 32, &mut out);
+    is_byte(cols::Q1, 32, &mut out); // q1[0..31]; q1[32] is an IS_BIT constraint
+
+    // IS_HALF range checks: c0[i]+8160, c1[i]+16319 (i=0..62), then k_sub_N / xR_sub_p.
+    let half_offset = |col: usize, off: i64| {
+        BusValue::linear(vec![
+            LinearTerm::Column {
+                coefficient: 1,
+                column: col,
+            },
+            LinearTerm::Constant(off),
+        ])
+    };
+    for i in 0..63 {
+        out.push(BusInteraction::sender(
+            BusId::IsHalfword,
+            mu(),
+            vec![half_offset(cols::c0(i), 8160)],
+        ));
+    }
+    for i in 0..63 {
+        out.push(BusInteraction::sender(
+            BusId::IsHalfword,
+            mu(),
+            vec![half_offset(cols::c1(i), 16319)],
+        ));
+    }
+    for i in 0..16 {
+        out.push(BusInteraction::sender(
+            BusId::IsHalfword,
+            mu(),
+            vec![packed(cols::k_sub_n(i))],
+        ));
+    }
+    for i in 0..16 {
+        out.push(BusInteraction::sender(
+            BusId::IsHalfword,
+            mu(),
+            vec![packed(cols::xr_sub_p(i))],
+        ));
+    }
+
+    // ZERO bus: assert k != 0 (sum of k's 32 bytes is nonzero).
+    out.push(BusInteraction::sender(
+        BusId::Zero,
+        mu(),
+        vec![
+            BusValue::linear(
+                (0..32)
+                    .map(|i| LinearTerm::Column {
+                        coefficient: 1,
+                        column: cols::k(i),
+                    })
+                    .collect(),
+            ),
+            BusValue::constant(0), // expected ZERO output = 0  ⇒  input is nonzero
+        ],
+    ));
+
+    // Delegation buses.
+    // SERVE_K send: [ts, addr_k, 31].
+    out.push(BusInteraction::sender(
+        BusId::ServeK,
+        mu(),
+        vec![
+            ts_lo(),
+            ts_hi(),
+            packed(cols::ADDR_K_0),
+            packed(cols::ADDR_K_1),
+            BusValue::constant(31),
+        ],
+    ));
+    // BIT receiver: the MSB at position len_k.
+    out.push(BusInteraction::receiver(
+        BusId::Bit,
+        mu(),
+        vec![ts_lo(), ts_hi(), packed(cols::LEN_K)],
+    ));
+    // ECDAS start: [ts, xG, yG, xG, yG, len_k - 1, 0].
+    out.push(BusInteraction::sender(
+        BusId::Ecdas,
+        mu(),
+        ecdas_tuple(
+            cols::XG,
+            cols::YG,
+            cols::XG,
+            cols::YG,
+            BusValue::linear(vec![
+                LinearTerm::Column {
+                    coefficient: 1,
+                    column: cols::LEN_K,
+                },
+                LinearTerm::Constant(-1),
+            ]),
+            BusValue::constant(0),
+            ts_lo(),
+            ts_hi(),
+        ),
+    ));
+    // ECDAS final receiver: [ts, xR, yR, xG, yG, -1, 0].
+    out.push(BusInteraction::receiver(
+        BusId::Ecdas,
+        mu(),
+        ecdas_tuple(
+            cols::XR,
+            cols::YR,
+            cols::XG,
+            cols::YG,
+            BusValue::linear(vec![LinearTerm::Constant(-1)]),
+            BusValue::constant(0),
+            ts_lo(),
+            ts_hi(),
+        ),
+    ));
+
+    out
+}
+
+/// Builds the ECDAS bus tuple `[ts_lo, ts_hi, accX(32), accY(32), genX(32), genY(32),
+/// round, op]`. Shared so the ECSM sender and the ECDAS receiver/sender pack it identically.
+#[allow(clippy::too_many_arguments)]
+pub fn ecdas_tuple(
+    acc_x: usize,
+    acc_y: usize,
+    gen_x: usize,
+    gen_y: usize,
+    round: BusValue,
+    op: BusValue,
+    ts_lo: BusValue,
+    ts_hi: BusValue,
+) -> Vec<BusValue> {
+    let mut v = Vec::with_capacity(2 + 4 * 32 + 2);
+    v.push(ts_lo);
+    v.push(ts_hi);
+    v.extend(point_coord_busvalues(acc_x));
+    v.extend(point_coord_busvalues(acc_y));
+    v.extend(point_coord_busvalues(gen_x));
+    v.extend(point_coord_busvalues(gen_y));
+    v.push(round);
+    v.push(op);
+    v
+}
+
+// =========================================================================
+// Constraints
+// =========================================================================
+
+/// Which convolution relation a carry constraint enforces.
+#[derive(Clone, Copy)]
+pub enum Relation {
+    /// `xG² − x2 − q0·p = 0`
+    X2,
+    /// `yG² + p² − xG·x2 − b − q1·p = 0`
+    Yg,
+}
+
+fn p_byte<F: IsField>(m: usize) -> FieldElement<F> {
+    if m < 32 {
+        FieldElement::from(P_BYTES[m] as u64)
+    } else {
+        FieldElement::zero()
+    }
+}
+
+/// Convolution carry constraint at limb `i`: `2^8·c_i − c_{i-1} − S_i = 0`, with `c_{-1} = 0`.
+/// Unconditional (degree 2); the only µ-gated term is the curve constant `µ·b` inside `S_i`
+/// for the yG relation at limb 0 (see [`ConvCarry::s_i`]).
+pub struct ConvCarry {
+    pub relation: Relation,
+    pub i: usize,
+    pub constraint_idx: usize,
+}
+
+impl ConvCarry {
+    fn s_i<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let i = self.i;
+        let col = |c: usize| -> FieldElement<F> { step.get_main_evaluation_element(0, c).clone() };
+        let byte = |base: usize, len: usize, j: usize| -> FieldElement<F> {
+            if j < len {
+                col(base + j)
+            } else {
+                FieldElement::zero()
+            }
+        };
+        let mut s = FieldElement::<F>::zero();
+        match self.relation {
+            Relation::X2 => {
+                // Σ xG_j·xG_{i-j} − x2_i − Σ q0_j·P_{i-j}
+                for j in 0..=i {
+                    s += byte(cols::XG, 32, j) * byte(cols::XG, 32, i - j);
+                    s = s - byte(cols::Q0, 32, j) * p_byte::<F>(i - j);
+                }
+                s = s - byte(cols::X2, 32, i);
+            }
+            Relation::Yg => {
+                // Σ (yG_j·yG_{i-j} + P_j·P_{i-j} − x2_j·xG_{i-j} − q1_j·P_{i-j}) − b_i
+                for j in 0..=i {
+                    s += byte(cols::YG, 32, j) * byte(cols::YG, 32, i - j);
+                    s += p_byte::<F>(j) * p_byte::<F>(i - j);
+                    s = s - byte(cols::X2, 32, j) * byte(cols::XG, 32, i - j);
+                    s = s - byte(cols::Q1, 33, j) * p_byte::<F>(i - j);
+                }
+                if i == 0 {
+                    // Only the curve constant `b` is gated by `µ`: it vanishes on padding
+                    // (µ=0) and equals `b` on real rows (µ=1). `B` is the zero-extension of
+                    // `b`, so `B_i = 0` for i ≥ 1 — nothing to gate there. The rest of the
+                    // relation stays unconditional.
+                    let mu = step.get_main_evaluation_element(0, cols::MU).clone();
+                    s = s - mu * FieldElement::<F>::from(B);
+                }
+            }
+        }
+        s
+    }
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for ConvCarry {
+    fn degree(&self) -> usize {
+        2 // degree-2 convolution; the only µ-gated term (µ·b) is degree 1
+    }
+
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let c_base = match self.relation {
+            Relation::X2 => cols::C0,
+            Relation::Yg => cols::C1,
+        };
+        let c_i = step.get_main_evaluation_element(0, c_base + self.i).clone();
+        let c_prev = if self.i == 0 {
+            FieldElement::<F>::zero()
+        } else {
+            step.get_main_evaluation_element(0, c_base + self.i - 1)
+                .clone()
+        };
+        FieldElement::<F>::from(256u64) * c_i - c_prev - self.s_i(step)
+    }
+}
+
+/// `col = 0` (unconditional, degree 1). Used for the closing `c_63 = 0`.
+pub struct ColIsZero {
+    pub col: usize,
+    pub constraint_idx: usize,
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for ColIsZero {
+    fn degree(&self) -> usize {
+        1
+    }
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        step.get_main_evaluation_element(0, self.col).clone()
+    }
+}
+
+/// The two 256-bit addition-overflow checks (`k < N` and `xR < p`), whose 8 word-carries
+/// `c` are virtual. Each `c_i = 2^-32·(addend0_i + addend1_i + c_{i-1} − sum_i)`. The addition
+/// must overflow `2^256` (carry-out `c_7 = 1`), which proves the strict inequality:
+/// `k < N` is `N + k_sub_N = k + 2^256` (with `k_sub_N = k − N mod 2^256`); `xR < p` is
+/// `p + xR_sub_p = xR + 2^256` (with `xR_sub_p = xR − p mod 2^256`).
+#[derive(Clone, Copy)]
+pub enum OverflowKind {
+    KLtN,
+    XrLtP,
+}
+
+impl OverflowKind {
+    /// The constant addend's 32-bit word `i` (`N` for `k<N`, `p` for `xR<p`).
+    fn const_word(self, i: usize) -> u64 {
+        let bytes = match self {
+            OverflowKind::KLtN => &N_BYTES,
+            OverflowKind::XrLtP => &P_BYTES,
+        };
+        let mut w = 0u64;
+        for b in 0..4 {
+            w += (bytes[4 * i + b] as u64) << (8 * b);
+        }
+        w
+    }
+    /// Column base of the witnessed halfword addend (`k_sub_N` / `xR_sub_p`).
+    fn addend_hl_base(self) -> usize {
+        match self {
+            OverflowKind::KLtN => cols::K_SUB_N,
+            OverflowKind::XrLtP => cols::XR_SUB_P,
+        }
+    }
+    /// Column base of the byte sum (`k` / `xR`).
+    fn sum_bl_base(self) -> usize {
+        match self {
+            OverflowKind::KLtN => cols::K,
+            OverflowKind::XrLtP => cols::XR,
+        }
+    }
+}
+
+/// Computes the 8 word-carries of the addition for `kind`.
+fn carry_chain<F, E>(kind: OverflowKind, step: &TableView<F, E>) -> [FieldElement<F>; 8]
+where
+    F: IsSubFieldOf<E>,
+    E: IsField,
+{
+    let inv = FieldElement::<F>::from(INV_SHIFT_32);
+    let hl = kind.addend_hl_base();
+    let bl = kind.sum_bl_base();
+    let mut c: [FieldElement<F>; 8] = std::array::from_fn(|_| FieldElement::zero());
+    let mut prev = FieldElement::<F>::zero();
+    for (i, slot) in c.iter_mut().enumerate() {
+        // addend1 word i (from halfwords): hl[2i] + 2^16·hl[2i+1]
+        let addend1 = step.get_main_evaluation_element(0, hl + 2 * i).clone()
+            + step.get_main_evaluation_element(0, hl + 2 * i + 1).clone()
+                * FieldElement::<F>::from(1u64 << 16);
+        // sum word i (from bytes): Σ bl[4i+b]·2^{8b}
+        let mut sum = FieldElement::<F>::zero();
+        for b in 0..4 {
+            sum += step.get_main_evaluation_element(0, bl + 4 * i + b).clone()
+                * FieldElement::<F>::from(1u64 << (8 * b));
+        }
+        let addend0 = FieldElement::<F>::from(kind.const_word(i));
+        let ci = (addend0 + addend1 + prev.clone() - sum) * inv.clone();
+        *slot = ci.clone();
+        prev = ci;
+    }
+    c
+}
+
+/// `µ · c_i · (1 - c_i) = 0` for a virtual carry bit (degree 3, since `c_i` is linear).
+pub struct CarryBit {
+    pub kind: OverflowKind,
+    pub i: usize,
+    pub constraint_idx: usize,
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for CarryBit {
+    fn degree(&self) -> usize {
+        3
+    }
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let c = carry_chain(self.kind, step);
+        let mu = step.get_main_evaluation_element(0, cols::MU).clone();
+        let one = FieldElement::<F>::one();
+        mu * c[self.i].clone() * (one - c[self.i].clone())
+    }
+}
+
+/// `µ · (1 - c_7) = 0`: the top carry must be 1 (the addition overflows).
+pub struct OverflowRequired {
+    pub kind: OverflowKind,
+    pub constraint_idx: usize,
+}
+
+impl TransitionConstraint<GoldilocksField, GoldilocksExtension> for OverflowRequired {
+    fn degree(&self) -> usize {
+        2
+    }
+    fn constraint_idx(&self) -> usize {
+        self.constraint_idx
+    }
+    fn evaluate<F, E>(&self, step: &TableView<F, E>) -> FieldElement<F>
+    where
+        F: IsSubFieldOf<E>,
+        E: IsField,
+    {
+        let c = carry_chain(self.kind, step);
+        let mu = step.get_main_evaluation_element(0, cols::MU).clone();
+        mu * (FieldElement::<F>::one() - c[7].clone())
+    }
+}
+
+/// Creates all ECSM transition constraints (148 total).
+pub fn create_constraints(
+    constraint_idx_start: usize,
+) -> (
+    Vec<Box<dyn TransitionConstraintEvaluator<GoldilocksField, GoldilocksExtension>>>,
+    usize,
+) {
+    let mut constraints: Vec<
+        Box<dyn TransitionConstraintEvaluator<GoldilocksField, GoldilocksExtension>>,
+    > = Vec::new();
+    let mut idx = constraint_idx_start;
+
+    // IS_BIT(mu)
+    constraints.push(IsBitConstraint::unconditional(cols::MU, idx).boxed());
+    idx += 1;
+
+    // x2 convolution: 64 carries + closing.
+    for i in 0..64 {
+        constraints.push(
+            ConvCarry {
+                relation: Relation::X2,
+                i,
+                constraint_idx: idx,
+            }
+            .boxed(),
+        );
+        idx += 1;
+    }
+    constraints.push(
+        ColIsZero {
+            col: cols::c0(63),
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    // yG convolution: 64 carries + closing.
+    for i in 0..64 {
+        constraints.push(
+            ConvCarry {
+                relation: Relation::Yg,
+                i,
+                constraint_idx: idx,
+            }
+            .boxed(),
+        );
+        idx += 1;
+    }
+    constraints.push(
+        ColIsZero {
+            col: cols::c1(63),
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    // IS_BIT(q1[32])
+    constraints.push(IsBitConstraint::unconditional(cols::q1(32), idx).boxed());
+    idx += 1;
+
+    // k < N: 7 carry bits + overflow-required.
+    for i in 0..7 {
+        constraints.push(
+            CarryBit {
+                kind: OverflowKind::KLtN,
+                i,
+                constraint_idx: idx,
+            }
+            .boxed(),
+        );
+        idx += 1;
+    }
+    constraints.push(
+        OverflowRequired {
+            kind: OverflowKind::KLtN,
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    // xR < p: 7 carry bits + overflow-required.
+    for i in 0..7 {
+        constraints.push(
+            CarryBit {
+                kind: OverflowKind::XrLtP,
+                i,
+                constraint_idx: idx,
+            }
+            .boxed(),
+        );
+        idx += 1;
+    }
+    constraints.push(
+        OverflowRequired {
+            kind: OverflowKind::XrLtP,
+            constraint_idx: idx,
+        }
+        .boxed(),
+    );
+    idx += 1;
+
+    (constraints, idx)
+}
diff --git a/prover/src/tables/mod.rs b/prover/src/tables/mod.rs
index 4401307a9..50bc399af 100644
--- a/prover/src/tables/mod.rs
+++ b/prover/src/tables/mod.rs
@@ -29,6 +29,9 @@ pub mod cpu;
 pub mod cpu32;
 pub mod decode;
 pub mod dvrm;
+pub mod ec_scalar;
+pub mod ecdas;
+pub mod ecsm;
 pub mod eq;
 pub mod halt;
 pub mod keccak;
diff --git a/prover/src/tables/trace_builder.rs b/prover/src/tables/trace_builder.rs
index e9fa9b7d3..7947fa088 100644
--- a/prover/src/tables/trace_builder.rs
+++ b/prover/src/tables/trace_builder.rs
@@ -45,6 +45,9 @@ use super::cpu::{self, CpuOperation};
 use super::cpu32;
 use super::decode;
 use super::dvrm::{self, DvrmOperation};
+use super::ec_scalar;
+use super::ecdas;
+use super::ecsm;
 use super::eq;
 use super::halt;
 use super::keccak::{self, KeccakOperation};
@@ -350,7 +353,8 @@ fn collect_cpu_ops(
 ///
 /// MEMW and LOAD collection requires sequential processing with state tracking.
 ///
-/// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops)
+/// Returns: (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops,
+/// cpu32_ops, ecsm_ops, ec_scalar_ops, ecdas_ops)
 #[allow(clippy::type_complexity)]
 fn collect_ops_from_cpu(
     cpu_ops: &[CpuOperation],
@@ -365,6 +369,9 @@ fn collect_ops_from_cpu(
     Vec<CommitOperation>,
     Vec<KeccakOperation>,
     Vec<cpu32::Cpu32Operation>,
+    Vec<ecsm::EcsmOperation>,
+    Vec<ec_scalar::EcScalarOperation>,
+    Vec<ecdas::EcdasOperation>,
 ) {
     let mut memw_ops = Vec::with_capacity(cpu_ops.len() * 3);
     let mut load_ops = Vec::with_capacity(cpu_ops.len() / 8 + 1);
@@ -374,6 +381,9 @@ fn collect_ops_from_cpu(
     let mut commit_ops = Vec::new();
     let mut keccak_ops = Vec::new();
     let mut cpu32_ops = Vec::new();
+    let mut ecsm_ops = Vec::new();
+    let mut ec_scalar_ops = Vec::new();
+    let mut ecdas_ops = Vec::new();
     let mut current_commit_index = 0u32;
     let mut commit_ecall_count = 0u32;
 
@@ -455,6 +465,16 @@ fn collect_ops_from_cpu(
             });
         }
 
+        // Collect ECSM ecall operations (memory I/O + the three table row sets)
+        if op.ecall_ecsm {
+            let (ecsm_memw, ecsm_op, ec_scalar_rows, ecdas_rows) =
+                collect_ecsm_ops(op, memory_state, register_state);
+            memw_ops.extend(ecsm_memw);
+            ecsm_ops.push(ecsm_op);
+            ec_scalar_ops.extend(ec_scalar_rows);
+            ecdas_ops.extend(ecdas_rows);
+        }
+
         // --- ALU chip dispatch (no state tracking) ---
         // Word (`*W`) instructions are delegated to CPU32 (which itself drives
         // the ALU chips); the main CPU does not send the ALU bus for them, so we
@@ -505,6 +525,9 @@ fn collect_ops_from_cpu(
         commit_ops,
         keccak_ops,
         cpu32_ops,
+        ecsm_ops,
+        ec_scalar_ops,
+        ecdas_ops,
     )
 }
 
@@ -612,6 +635,128 @@ fn collect_store_op_from_cpu(op: &CpuOperation, memory_state: &mut MemoryState)
     memw_op
 }
 
+/// Collects all MEMW ops and the ECSM / EC_SCALAR / ECDAS table ops for one ECSM ecall.
+///
+/// Timestamp scheme (within the instruction's 4-wide budget): the `x11`/`x12` register reads
+/// and the `xG`/`k` memory reads happen at `T`; the `x10` register read and the EC_SCALAR
+/// byte reads at `T + 1`; the `xR` memory writes at `T + 2`. Every read advances
+/// `memory_state` / `register_state` (the offline read-old + write-new model), so later
+/// accesses always observe a strictly smaller old timestamp.
+#[allow(clippy::needless_range_loop)]
+fn collect_ecsm_ops(
+    op: &CpuOperation,
+    memory_state: &mut MemoryState,
+    register_state: &mut RegisterState,
+) -> (
+    Vec<MemwOperation>,
+    ecsm::EcsmOperation,
+    Vec<ec_scalar::EcScalarOperation>,
+    Vec<ecdas::EcdasOperation>,
+) {
+    let t = op.timestamp;
+    let addr_xr = register_state.read(10).0;
+    let addr_xg = register_state.read(11).0;
+    let addr_k = register_state.read(12).0;
+
+    // Read the xG and k operands (32 little-endian bytes each) from memory.
+    let mut xg = [0u8; 32];
+    let mut k = [0u8; 32];
+    for i in 0..32 {
+        xg[i] = memory_state.read_byte(addr_xg.wrapping_add(i as u64)).0;
+        k[i] = memory_state.read_byte(addr_k.wrapping_add(i as u64)).0;
+    }
+
+    let witness = ::ecsm::compute_witness(&k, &xg)
+        .expect("ECSM witness: executor validates 0 < k < N and xG on curve");
+
+    let mut memw_ops = Vec::with_capacity(47);
+
+    // x11 -> addr_xG, x12 -> addr_k (register reads at T).
+    for reg in [11u8, 12u8] {
+        let (val, old_ts) = register_state.read(reg);
+        let value = pack_register_value(val);
+        memw_ops.push(
+            MemwOperation::new(true, 2 * reg as u64, value, t, 2, true)
+                .with_old(value, [old_ts, old_ts, 0, 0, 0, 0, 0, 0]),
+        );
+        register_state.write(reg, val, t);
+    }
+
+    // xG and k: 4 doubleword reads each at T.
+    for (base, bytes) in [(addr_xg, &witness.x_g), (addr_k, &witness.k)] {
+        for i in 0..4 {
+            let addr = base.wrapping_add((8 * i) as u64);
+            let mut value = [0u64; 8];
+            let mut dword = 0u64;
+            for j in 0..8 {
+                value[j] = bytes[8 * i + j] as u64;
+                dword |= (bytes[8 * i + j] as u64) << (8 * j);
+            }
+            let (_old, old_ts) = memory_state.read_bytes(addr, 8);
+            memw_ops
+                .push(MemwOperation::new(false, addr, value, t, 8, true).with_old(value, old_ts));
+            memory_state.write_bytes(addr, dword, 8, t);
+        }
+    }
+
+    // x10 -> addr_xR (register read at T + 1).
+    {
+        let (val, old_ts) = register_state.read(10);
+        let value = pack_register_value(val);
+        memw_ops.push(
+            MemwOperation::new(true, 2 * 10, value, t + 1, 2, true)
+                .with_old(value, [old_ts, old_ts, 0, 0, 0, 0, 0, 0]),
+        );
+        register_state.write(10, val, t + 1);
+    }
+
+    // EC_SCALAR byte reads of k at T + 1 (one per scalar byte).
+    for offset in 0..32u64 {
+        let addr = addr_k.wrapping_add(offset);
+        let byte = k[offset as usize];
+        let value = [byte as u64, 0, 0, 0, 0, 0, 0, 0];
+        let (_v, old_ts) = memory_state.read_byte(addr);
+        memw_ops.push(
+            MemwOperation::new(false, addr, value, t + 1, 1, true)
+                .with_old(value, [old_ts, 0, 0, 0, 0, 0, 0, 0]),
+        );
+        memory_state.write_byte(addr, byte, t + 1);
+    }
+
+    // xR writes at T + 2 (4 doublewords).
+    for i in 0..4 {
+        let addr = addr_xr.wrapping_add((8 * i) as u64);
+        let mut value = [0u64; 8];
+        let mut dword = 0u64;
+        for j in 0..8 {
+            value[j] = witness.x_r[8 * i + j] as u64;
+            dword |= (witness.x_r[8 * i + j] as u64) << (8 * j);
+        }
+        let (old_vals, old_ts) = memory_state.read_bytes(addr, 8);
+        memw_ops.push(
+            MemwOperation::new(false, addr, value, t + 2, 8, false).with_old(old_vals, old_ts),
+        );
+        memory_state.write_bytes(addr, dword, 8, t + 2);
+    }
+
+    let ec_scalar_ops = ec_scalar::rows_for_scalar(t, addr_k, &witness.k);
+    let ecdas_ops = witness
+        .steps
+        .iter()
+        .cloned()
+        .map(|step| ecdas::EcdasOperation { timestamp: t, step })
+        .collect();
+    let ecsm_op = ecsm::EcsmOperation {
+        timestamp: t,
+        addr_xg,
+        addr_k,
+        addr_xr,
+        witness,
+    };
+
+    (memw_ops, ecsm_op, ec_scalar_ops, ecdas_ops)
+}
+
 /// Collects register read/write operations (M1, M3, M5) from CpuOperation.
 ///
 /// Returns: Vec of MEMW operations for register accesses
@@ -1857,6 +2002,81 @@ fn collect_bitwise_from_commit(commit_ops: &[CommitOperation]) -> Vec<BitwiseOpe
 /// Generates PAGE tables for memory initialization and finalization.
 ///
 /// Derives all page bases from `memory_state.cells.keys()` — this includes
+/// IS_HALF lookup for a value `v ∈ [0, 2^16)` (split into low/high bytes).
+fn is_half_op(v: u16) -> BitwiseOperation {
+    BitwiseOperation::halfword(
+        BitwiseOperationType::IsHalf,
+        (v & 0xFF) as u8,
+        (v >> 8) as u8,
+    )
+}
+
+/// IS_BYTE lookup for a single byte (sent as `AreBytes[byte, 0]`).
+fn is_byte_op(b: u8) -> BitwiseOperation {
+    BitwiseOperation::byte_op(BitwiseOperationType::AreBytes, b, 0)
+}
+
+/// BITWISE lookups sent by the ECSM core table (range checks + the `k != 0` ZERO check),
+/// so the BITWISE receiver multiplicities account for them.
+#[allow(clippy::needless_range_loop)]
+pub(crate) fn collect_bitwise_from_ecsm(ops: &[ecsm::EcsmOperation]) -> Vec<BitwiseOperation> {
+    let mut out = Vec::new();
+    for op in ops {
+        let w = &op.witness;
+        // IS_BYTE on x2, q0, yG, q1[0..31].
+        for i in 0..32 {
+            out.push(is_byte_op(w.x2[i]));
+            out.push(is_byte_op(w.q0[i]));
+            out.push(is_byte_op(w.y_g[i]));
+            out.push(is_byte_op(w.q1[i]));
+        }
+        // IS_HALF on the carries c0[i]+8160, c1[i]+16319 (i = 0..62).
+        for i in 0..63 {
+            out.push(is_half_op((w.c0[i] + 8160) as u16));
+            out.push(is_half_op((w.c1[i] + 16319) as u16));
+        }
+        // IS_HALF on the U256HL limbs of k_sub_N and xR_sub_p.
+        for i in 0..16 {
+            out.push(is_half_op(
+                w.k_sub_n[2 * i] as u16 + ((w.k_sub_n[2 * i + 1] as u16) << 8),
+            ));
+            out.push(is_half_op(
+                w.x_r_sub_p[2 * i] as u16 + ((w.x_r_sub_p[2 * i + 1] as u16) << 8),
+            ));
+        }
+        // ZERO: assert k != 0 (sum of k's bytes).
+        let sum: u32 = w.k.iter().map(|&b| b as u32).sum();
+        out.push(BitwiseOperation::zero(sum));
+    }
+    out
+}
+
+/// BITWISE lookups sent by every ECDAS row (range checks on the byte limbs + carries).
+#[allow(clippy::needless_range_loop)]
+pub(crate) fn collect_bitwise_from_ecdas(ops: &[ecdas::EcdasOperation]) -> Vec<BitwiseOperation> {
+    let mut out = Vec::new();
+    for op in ops {
+        let s = &op.step;
+        out.push(is_byte_op(s.round));
+        for i in 0..32 {
+            out.push(is_byte_op(s.lambda[i]));
+            out.push(is_byte_op(s.x_r[i]));
+            out.push(is_byte_op(s.y_r[i]));
+        }
+        for i in 0..33 {
+            out.push(is_byte_op(s.q0[i]));
+            out.push(is_byte_op(s.q1[i]));
+            out.push(is_byte_op(s.q2[i]));
+        }
+        for i in 0..63 {
+            out.push(is_half_op((s.c0[i] + 32636) as u16));
+            out.push(is_half_op((s.c1[i] + 8161) as u16));
+            out.push(is_half_op((s.c2[i] + 16320) as u16));
+        }
+    }
+    out
+}
+
 /// Collect BITWISE lookups generated by the keccak chips.
 ///
 /// The keccak round chip sends BYTE_ALU, HWSL, and ARE_BYTES
@@ -2238,6 +2458,15 @@ pub struct Traces {
     /// KECCAK_RC precomputed round constant table (32 rows)
     pub keccak_rc: TraceTable<GoldilocksField, GoldilocksExtension>,
 
+    /// ECSM core table (one row per scalar-multiplication ecall)
+    pub ecsm: TraceTable<GoldilocksField, GoldilocksExtension>,
+
+    /// EC_SCALAR table (32 rows per ecall)
+    pub ec_scalar: TraceTable<GoldilocksField, GoldilocksExtension>,
+
+    /// ECDAS double/add table (variable rows per ecall)
+    pub ecdas: TraceTable<GoldilocksField, GoldilocksExtension>,
+
     /// MEMW_R register-only fast-path traces (split into chunks of max_rows::MEMW_R)
     pub memw_registers: Vec<TraceTable<GoldilocksField, GoldilocksExtension>>,
     // Auxiliary ALU / memory / CPU32 dispatch chips (split into chunks of their max_rows)
@@ -2268,6 +2497,10 @@ struct CollectedOps {
     bytewise_ops: Vec<bytewise::BytewiseOperation>,
     store_ops: Vec<store::StoreOperation>,
     cpu32_ops: Vec<cpu32::Cpu32Operation>,
+    // EC scalar-multiplication accelerator chips.
+    ecsm_ops: Vec<ecsm::EcsmOperation>,
+    ec_scalar_ops: Vec<ec_scalar::EcScalarOperation>,
+    ecdas_ops: Vec<ecdas::EcdasOperation>,
 }
 
 /// Chunk raw ops and generate one trace table per chunk. When `storage_mode`
@@ -2314,6 +2547,9 @@ fn collect_all_ops(
     commit_ops: Vec<CommitOperation>,
     keccak_ops: Vec<KeccakOperation>,
     cpu32_ops: Vec<cpu32::Cpu32Operation>,
+    ecsm_ops: Vec<ecsm::EcsmOperation>,
+    ec_scalar_ops: Vec<ec_scalar::EcScalarOperation>,
+    ecdas_ops: Vec<ecdas::EcdasOperation>,
     register_state: &mut RegisterState,
 ) -> CollectedOps {
     // HALT finalization: 33 register MEMW operations at timestamp u64::MAX.
@@ -2445,6 +2681,9 @@ fn collect_all_ops(
         bytewise_ops,
         store_ops,
         cpu32_ops,
+        ecsm_ops,
+        ec_scalar_ops,
+        ecdas_ops,
     }
 }
 
@@ -2483,6 +2722,9 @@ fn build_traces(
         bytewise_ops,
         store_ops,
         cpu32_ops,
+        ecsm_ops,
+        ec_scalar_ops,
+        ecdas_ops,
     } = ops;
 
     // =====================================================================
@@ -2526,6 +2768,8 @@ fn build_traces(
     bitwise_ops.extend(collect_bitwise_from_commit(&commit_ops));
     // KECCAK_RND sends XOR/AND/ARE_BYTES/HWSL; KECCAK core sends IS_HALF
     bitwise_ops.extend(collect_bitwise_from_keccak(&keccak_ops));
+    bitwise_ops.extend(collect_bitwise_from_ecsm(&ecsm_ops));
+    bitwise_ops.extend(collect_bitwise_from_ecdas(&ecdas_ops));
 
     // CPU padding rows send ARE_BYTES with all-zero values.
     // Add corresponding ops so the bitwise table multiplicities balance.
@@ -2693,6 +2937,11 @@ fn build_traces(
     let mut keccak_rc_trace = keccak_rc::generate_keccak_rc_trace();
     keccak_rc::update_multiplicities(&mut keccak_rc_trace, keccak_ops.len());
 
+    // ECSM accelerator traces (empty/all-padding for programs that do not use ECSM).
+    let ecsm_trace = ecsm::generate_ecsm_trace(&ecsm_ops);
+    let ec_scalar_trace = ec_scalar::generate_ec_scalar_trace(&ec_scalar_ops);
+    let ecdas_trace = ecdas::generate_ecdas_trace(&ecdas_ops);
+
     #[allow(unused_mut)]
     let (mut pages, page_configs, mut register_trace, mut halt_trace);
     #[cfg(feature = "parallel")]
@@ -2784,6 +3033,9 @@ fn build_traces(
         keccak: keccak_trace,
         keccak_rnd: keccak_rnd_trace,
         keccak_rc: keccak_rc_trace,
+        ecsm: ecsm_trace,
+        ec_scalar: ec_scalar_trace,
+        ecdas: ecdas_trace,
         memw_registers,
         eqs,
         bytewises,
@@ -3037,6 +3289,9 @@ impl Traces {
         use super::decode::NUM_PRECOMPUTED_COLS as DECODE_PRECOMPUTED;
         use super::decode::cols::NUM_COLUMNS as DECODE_COLS;
         use super::dvrm::cols::NUM_COLUMNS as DVRM_COLS;
+        use super::ec_scalar::cols::NUM_COLUMNS as EC_SCALAR_COLS;
+        use super::ecdas::cols::NUM_COLUMNS as ECDAS_COLS;
+        use super::ecsm::cols::NUM_COLUMNS as ECSM_COLS;
         use super::eq::cols::NUM_COLUMNS as EQ_COLS;
         use super::halt::cols::NUM_COLUMNS as HALT_COLS;
         use super::keccak::cols::NUM_COLUMNS as KECCAK_COLS;
@@ -3075,6 +3330,9 @@ impl Traces {
             keccak,
             keccak_rnd,
             keccak_rc,
+            ecsm,
+            ec_scalar,
+            ecdas,
             memw_registers,
             eqs,
             bytewises,
@@ -3138,6 +3396,9 @@ impl Traces {
         for t in cpu32s {
             total += (t.num_rows() * CPU32_COLS) as u64;
         }
+        total += (ecsm.num_rows() * ECSM_COLS) as u64;
+        total += (ec_scalar.num_rows() * EC_SCALAR_COLS) as u64;
+        total += (ecdas.num_rows() * ECDAS_COLS) as u64;
         total
     }
 
@@ -3177,6 +3438,9 @@ impl Traces {
         let n_bytewise = aux_cols(super::bytewise::bus_interactions().len());
         let n_store = aux_cols(super::store::bus_interactions().len());
         let n_cpu32 = aux_cols(super::cpu32::bus_interactions().len());
+        let n_ecsm = aux_cols(super::ecsm::bus_interactions().len());
+        let n_ec_scalar = aux_cols(super::ec_scalar::bus_interactions().len());
+        let n_ecdas = aux_cols(super::ecdas::bus_interactions().len());
 
         let Traces {
             cpus,
@@ -3197,6 +3461,9 @@ impl Traces {
             keccak,
             keccak_rnd,
             keccak_rc,
+            ecsm,
+            ec_scalar,
+            ecdas,
             memw_registers,
             eqs,
             bytewises,
@@ -3260,6 +3527,9 @@ impl Traces {
         for t in cpu32s {
             total += (t.num_rows() * n_cpu32) as u64;
         }
+        total += (ecsm.num_rows() * n_ecsm) as u64;
+        total += (ec_scalar.num_rows() * n_ec_scalar) as u64;
+        total += (ecdas.num_rows() * n_ecdas) as u64;
         total
     }
 
@@ -3418,8 +3688,19 @@ impl Traces {
         let mut memory_state = MemoryState::from_elf(elf);
         memory_state.add_private_input(private_input);
         let mut register_state = RegisterState::new(elf.entry_point);
-        let (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops, cpu32_ops) =
-            collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state);
+        let (
+            memw_ops,
+            load_ops,
+            lt_ops,
+            shift_ops,
+            bitwise_ops,
+            commit_ops,
+            keccak_ops,
+            cpu32_ops,
+            ecsm_ops,
+            ec_scalar_ops,
+            ecdas_ops,
+        ) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state);
 
         let ops = collect_all_ops(
             cpu_ops,
@@ -3431,6 +3712,9 @@ impl Traces {
             commit_ops,
             keccak_ops,
             cpu32_ops,
+            ecsm_ops,
+            ec_scalar_ops,
+            ecdas_ops,
             &mut register_state,
         );
 
@@ -3468,8 +3752,19 @@ impl Traces {
         let mut memory_state = MemoryState::new();
         let entry_point = cpu_ops.first().map_or(0, |op| op.decode.pc);
         let mut register_state = RegisterState::new(entry_point);
-        let (memw_ops, load_ops, lt_ops, shift_ops, bitwise_ops, commit_ops, keccak_ops, cpu32_ops) =
-            collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state);
+        let (
+            memw_ops,
+            load_ops,
+            lt_ops,
+            shift_ops,
+            bitwise_ops,
+            commit_ops,
+            keccak_ops,
+            cpu32_ops,
+            ecsm_ops,
+            ec_scalar_ops,
+            ecdas_ops,
+        ) = collect_ops_from_cpu(&cpu_ops, &mut memory_state, &mut register_state);
 
         let ops = collect_all_ops(
             cpu_ops,
@@ -3481,6 +3776,9 @@ impl Traces {
             commit_ops,
             keccak_ops,
             cpu32_ops,
+            ecsm_ops,
+            ec_scalar_ops,
+            ecdas_ops,
             &mut register_state,
         );
 
diff --git a/prover/src/tables/types.rs b/prover/src/tables/types.rs
index 195b1e005..bc16ce780 100644
--- a/prover/src/tables/types.rs
+++ b/prover/src/tables/types.rs
@@ -128,6 +128,18 @@ pub enum BusId {
     /// CPU → CPU32 delegation of word (`*W`) instructions:
     /// `CPU32[timestamp, pc, instruction_length]`.
     Cpu32 = 27,
+
+    // =========================================================================
+    // EC scalar multiplication accelerator (ECSM / ECDAS / EC_SCALAR)
+    // =========================================================================
+    /// ECDAS self-referential double/add sequence bus:
+    /// (timestamp, xA, yA, xG, yG, round, op). ECSM seeds and drains it.
+    Ecdas = 28,
+    /// EC_SCALAR self-referential scalar-byte server bus: (timestamp, ptr, offset).
+    ServeK = 29,
+    /// Scalar-bit bus: EC_SCALAR sends one per set bit (timestamp, bit_index);
+    /// ECDAS receives one per add, ECSM receives the MSB.
+    Bit = 30,
 }
 
 impl BusId {
@@ -154,6 +166,9 @@ impl BusId {
             BusId::Alu => "Alu",
             BusId::MemoryOp => "MemoryOp",
             BusId::Cpu32 => "Cpu32",
+            BusId::Ecdas => "Ecdas",
+            BusId::ServeK => "ServeK",
+            BusId::Bit => "Bit",
         }
     }
 }
@@ -183,6 +198,9 @@ impl TryFrom<u64> for BusId {
             25 => Ok(BusId::Alu),
             26 => Ok(BusId::MemoryOp),
             27 => Ok(BusId::Cpu32),
+            28 => Ok(BusId::Ecdas),
+            29 => Ok(BusId::ServeK),
+            30 => Ok(BusId::Bit),
             other => Err(other),
         }
     }
diff --git a/prover/src/test_utils.rs b/prover/src/test_utils.rs
index 31434f5ab..fd9d9d40c 100644
--- a/prover/src/test_utils.rs
+++ b/prover/src/test_utils.rs
@@ -58,6 +58,11 @@ use crate::tables::decode::{bus_interactions as decode_bus_interactions, cols as
 use crate::tables::dvrm::{
     bus_interactions as dvrm_bus_interactions, cols as dvrm_cols, dvrm_constraints,
 };
+use crate::tables::ec_scalar::{
+    bus_interactions as ec_scalar_bus_interactions, cols as ec_scalar_cols,
+};
+use crate::tables::ecdas::{bus_interactions as ecdas_bus_interactions, cols as ecdas_cols};
+use crate::tables::ecsm::{bus_interactions as ecsm_bus_interactions, cols as ecsm_cols};
 use crate::tables::eq::{bus_interactions as eq_bus_interactions, cols as eq_cols, eq_constraints};
 use crate::tables::halt::{bus_interactions as halt_bus_interactions, cols as halt_cols};
 use crate::tables::keccak::{bus_interactions as keccak_bus_interactions, cols as keccak_cols};
@@ -1040,3 +1045,51 @@ pub fn create_keccak_rc_air(proof_options: &ProofOptions) -> VmAir {
     )
     .with_name("KECCAK_RC")
 }
+
+/// Create ECSM core AIR (secp256k1 scalar-multiplication orchestrator).
+pub fn create_ecsm_air(proof_options: &ProofOptions) -> VmAir {
+    let (transition_constraints, _) = crate::tables::ecsm::create_constraints(0);
+    let auxiliary_trace_build_data = AuxiliaryTraceBuildData {
+        interactions: ecsm_bus_interactions(),
+    };
+    AirWithBuses::new(
+        ecsm_cols::NUM_COLUMNS,
+        auxiliary_trace_build_data,
+        proof_options,
+        1,
+        transition_constraints,
+    )
+    .with_name("ECSM")
+}
+
+/// Create EC_SCALAR AIR (serves the scalar bit-by-bit to ECDAS).
+pub fn create_ec_scalar_air(proof_options: &ProofOptions) -> VmAir {
+    let (transition_constraints, _) = crate::tables::ec_scalar::create_constraints(0);
+    let auxiliary_trace_build_data = AuxiliaryTraceBuildData {
+        interactions: ec_scalar_bus_interactions(),
+    };
+    AirWithBuses::new(
+        ec_scalar_cols::NUM_COLUMNS,
+        auxiliary_trace_build_data,
+        proof_options,
+        1,
+        transition_constraints,
+    )
+    .with_name("EC_SCALAR")
+}
+
+/// Create ECDAS AIR (per-step double/add of the scalar-multiplication sequence).
+pub fn create_ecdas_air(proof_options: &ProofOptions) -> VmAir {
+    let (transition_constraints, _) = crate::tables::ecdas::create_constraints(0);
+    let auxiliary_trace_build_data = AuxiliaryTraceBuildData {
+        interactions: ecdas_bus_interactions(),
+    };
+    AirWithBuses::new(
+        ecdas_cols::NUM_COLUMNS,
+        auxiliary_trace_build_data,
+        proof_options,
+        1,
+        transition_constraints,
+    )
+    .with_name("ECDAS")
+}
diff --git a/prover/src/tests/ec_scalar_tests.rs b/prover/src/tests/ec_scalar_tests.rs
new file mode 100644
index 000000000..462443843
--- /dev/null
+++ b/prover/src/tests/ec_scalar_tests.rs
@@ -0,0 +1,91 @@
+//! Tests for the EC_SCALAR table — constraint satisfaction on generated traces,
+//! the `last_limb` schedule, and the constraint count.
+
+use crate::constraints::templates::IsBitConstraint;
+use crate::tables::ec_scalar::{
+    MulZeroConstraint, cols, create_constraints, generate_ec_scalar_trace, rows_for_scalar,
+};
+use crate::tables::types::{FE, GoldilocksExtension, GoldilocksField};
+use stark::constraints::transition::TransitionConstraint;
+use stark::table::TableView;
+use stark::trace::TraceTable;
+
+/// Builds a one-row `TableView` for `row` of the trace (constraints only read row 0).
+fn row_view(
+    trace: &TraceTable<GoldilocksField, GoldilocksExtension>,
+    row: usize,
+) -> TableView<GoldilocksField, GoldilocksExtension> {
+    let main: Vec<FE> = (0..cols::NUM_COLUMNS)
+        .map(|c| *trace.main_table.get(row, c))
+        .collect();
+    TableView::new(vec![main], vec![])
+}
+
+#[test]
+fn constraints_hold_on_generated_trace() {
+    let mut k = [0u8; 32];
+    // a scalar with assorted bit patterns across several bytes
+    k[0] = 0b1010_0101;
+    k[1] = 0xFF;
+    k[15] = 0x80;
+    k[31] = 0x01;
+    let ops = rows_for_scalar(444, 0x3000, &k);
+    let trace = generate_ec_scalar_trace(&ops);
+
+    // IS_BIT columns
+    let mut bit_cols = vec![cols::MU];
+    bit_cols.extend((0..8).map(cols::limb_bit));
+    bit_cols.push(cols::LAST_LIMB);
+
+    for row in 0..trace.num_rows() {
+        let view = row_view(&trace, row);
+        for &col in &bit_cols {
+            let v = IsBitConstraint::unconditional(col, 0).evaluate(&view);
+            assert_eq!(v, FE::zero(), "IS_BIT col {col} row {row}");
+        }
+        // implication constraints
+        for i in 0..8 {
+            let c = MulZeroConstraint {
+                a: cols::limb_bit(i),
+                b: cols::MU,
+                b_complement: true,
+                constraint_idx: 0,
+            };
+            assert_eq!(c.evaluate(&view), FE::zero(), "limb_bit{i}=>mu row {row}");
+        }
+        let c = MulZeroConstraint {
+            a: cols::LAST_LIMB,
+            b: cols::MU,
+            b_complement: true,
+            constraint_idx: 0,
+        };
+        assert_eq!(c.evaluate(&view), FE::zero(), "last_limb=>mu row {row}");
+        let c = MulZeroConstraint {
+            a: cols::LAST_LIMB,
+            b: cols::OFFSET,
+            b_complement: false,
+            constraint_idx: 0,
+        };
+        assert_eq!(c.evaluate(&view), FE::zero(), "last_limb=>offset row {row}");
+    }
+}
+
+#[test]
+fn last_limb_set_only_at_offset_zero() {
+    let k = [7u8; 32];
+    let ops = rows_for_scalar(4, 0x100, &k);
+    assert_eq!(ops.len(), 32);
+    for op in &ops {
+        assert_eq!(op.last_limb, op.offset == 0);
+    }
+    // 32 distinct offsets 31..0
+    assert_eq!(ops[0].offset, 31);
+    assert_eq!(ops[31].offset, 0);
+}
+
+#[test]
+fn create_constraints_count() {
+    let (constraints, next) = create_constraints(0);
+    assert_eq!(constraints.len(), 20);
+    assert_eq!(next, 20);
+}
diff --git a/prover/src/tests/ecdas_tests.rs b/prover/src/tests/ecdas_tests.rs
new file mode 100644
index 000000000..4e6a95bee
--- /dev/null
+++ b/prover/src/tests/ecdas_tests.rs
@@ -0,0 +1,133 @@
+//! Tests for the ECDAS double/add table — the `R_BYTES` offset constant, constraint
+//! satisfaction on generated traces across many scalars, and the constraint count.
+
+use crate::constraints::templates::IsBitConstraint;
+use crate::tables::ecdas::{
+    ColIsZero, ConvCarry, EcdasOperation, MulZero, R_BYTES, Relation, cols, create_constraints,
+    generate_ecdas_trace,
+};
+use crate::tables::types::{FE, GoldilocksExtension, GoldilocksField};
+use ecsm::compute_witness;
+use stark::constraints::transition::TransitionConstraint;
+use stark::table::TableView;
+use stark::trace::TraceTable;
+
+fn gx_le() -> [u8; 32] {
+    let mut be = [
+        0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    be.reverse();
+    be
+}
+
+fn k_le(v: u64) -> [u8; 32] {
+    let mut k = [0u8; 32];
+    k[..8].copy_from_slice(&v.to_le_bytes());
+    k
+}
+
+fn ops_for(k: u64) -> Vec<EcdasOperation> {
+    let w = compute_witness(&k_le(k), &gx_le()).unwrap();
+    w.steps
+        .into_iter()
+        .map(|step| EcdasOperation {
+            timestamp: 444,
+            step,
+        })
+        .collect()
+}
+
+fn row_view(
+    trace: &TraceTable<GoldilocksField, GoldilocksExtension>,
+    row: usize,
+) -> TableView<GoldilocksField, GoldilocksExtension> {
+    let main: Vec<FE> = (0..cols::NUM_COLUMNS)
+        .map(|c| *trace.main_table.get(row, c))
+        .collect();
+    TableView::new(vec![main], vec![])
+}
+
+#[test]
+fn r_bytes_is_three_p() {
+    // 3·p as 33 little-endian bytes, cross-checked against the ecsm field modulus.
+    let p = ecsm::p();
+    let three_p = &p * 3u32;
+    let mut bytes = three_p.to_bytes_le();
+    bytes.resize(33, 0);
+    assert_eq!(&bytes[..], &R_BYTES[..]);
+}
+
+/// Every ECDAS constraint evaluates to zero on a generated trace across many scalars
+/// (which exercise both double and add steps), including padding rows.
+#[test]
+fn constraints_hold_on_generated_trace() {
+    for k in [2u64, 3, 5, 7, 0xFF, 0xABCD, 1_000_003] {
+        let ops = ops_for(k);
+        assert!(!ops.is_empty(), "k={k} should have steps");
+        let trace = generate_ecdas_trace(&ops);
+
+        for row in 0..trace.num_rows() {
+            let view = row_view(&trace, row);
+            assert_eq!(
+                IsBitConstraint::unconditional(cols::MU, 0).evaluate(&view),
+                FE::zero(),
+                "is_bit(mu) k={k} row {row}"
+            );
+            assert_eq!(
+                IsBitConstraint::unconditional(cols::NEXT_OP, 0).evaluate(&view),
+                FE::zero()
+            );
+            assert_eq!(
+                MulZero {
+                    a: cols::OP,
+                    b: cols::NEXT_OP,
+                    b_complement: false,
+                    constraint_idx: 0
+                }
+                .evaluate(&view),
+                FE::zero(),
+                "op·next_op k={k} row {row}"
+            );
+            assert_eq!(
+                MulZero {
+                    a: cols::NEXT_OP,
+                    b: cols::MU,
+                    b_complement: true,
+                    constraint_idx: 0
+                }
+                .evaluate(&view),
+                FE::zero()
+            );
+            for relation in [Relation::Lambda, Relation::Xr, Relation::Yr] {
+                for i in 0..64 {
+                    let v = ConvCarry {
+                        relation,
+                        i,
+                        constraint_idx: 0,
+                    }
+                    .evaluate(&view);
+                    assert_eq!(v, FE::zero(), "conv k={k} i={i} row {row}");
+                }
+            }
+            for c_base in [cols::C0, cols::C1, cols::C2] {
+                assert_eq!(
+                    ColIsZero {
+                        col: c_base + 63,
+                        constraint_idx: 0
+                    }
+                    .evaluate(&view),
+                    FE::zero()
+                );
+            }
+        }
+    }
+}
+
+#[test]
+fn create_constraints_count() {
+    let (constraints, next) = create_constraints(0);
+    assert_eq!(constraints.len(), 199);
+    assert_eq!(next, 199);
+}
diff --git a/prover/src/tests/ecsm_tests.rs b/prover/src/tests/ecsm_tests.rs
new file mode 100644
index 000000000..bc92c4596
--- /dev/null
+++ b/prover/src/tests/ecsm_tests.rs
@@ -0,0 +1,194 @@
+//! Tests for the ECSM core table — constraint satisfaction on generated traces,
+//! constraint count, and the yG padding-closure argument.
+
+use crate::constraints::templates::IsBitConstraint;
+use crate::tables::ecsm::{
+    CarryBit, ColIsZero, ConvCarry, EcsmOperation, OverflowKind, OverflowRequired, Relation, cols,
+    create_constraints, generate_ecsm_trace,
+};
+use crate::tables::types::{FE, GoldilocksExtension, GoldilocksField};
+use ecsm::{P_BYTES, compute_witness};
+use stark::constraints::transition::TransitionConstraint;
+use stark::table::TableView;
+use stark::trace::TraceTable;
+
+fn gx_le() -> [u8; 32] {
+    // secp256k1 Gx, little-endian.
+    let mut be = [
+        0x79, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    be.reverse();
+    be
+}
+
+fn k_le(v: u64) -> [u8; 32] {
+    let mut k = [0u8; 32];
+    k[..8].copy_from_slice(&v.to_le_bytes());
+    k
+}
+
+fn op_for(k: u64) -> EcsmOperation {
+    let witness = compute_witness(&k_le(k), &gx_le()).unwrap();
+    EcsmOperation {
+        timestamp: 444,
+        addr_xg: 0x2000,
+        addr_k: 0x3000,
+        addr_xr: 0x1000,
+        witness,
+    }
+}
+
+fn row_view(
+    trace: &TraceTable<GoldilocksField, GoldilocksExtension>,
+    row: usize,
+) -> TableView<GoldilocksField, GoldilocksExtension> {
+    let main: Vec<FE> = (0..cols::NUM_COLUMNS)
+        .map(|c| *trace.main_table.get(row, c))
+        .collect();
+    TableView::new(vec![main], vec![])
+}
+
+/// Every ECSM constraint evaluates to zero on a generated trace (real + padding rows).
+#[test]
+fn constraints_hold_on_generated_trace() {
+    let ops: Vec<EcsmOperation> = [1u64, 2, 5, 0xFFFF, 1_000_003]
+        .iter()
+        .map(|&k| op_for(k))
+        .collect();
+    let trace = generate_ecsm_trace(&ops);
+
+    for row in 0..trace.num_rows() {
+        let view = row_view(&trace, row);
+        // Re-evaluate concrete constraints (mirror create_constraints) at this row.
+        assert_eq!(
+            IsBitConstraint::unconditional(cols::MU, 0).evaluate(&view),
+            FE::zero(),
+            "is_bit(mu) row {row}"
+        );
+        for i in 0..64 {
+            for relation in [Relation::X2, Relation::Yg] {
+                let v = ConvCarry {
+                    relation,
+                    i,
+                    constraint_idx: 0,
+                }
+                .evaluate(&view);
+                assert_eq!(v, FE::zero(), "conv carry i={i} row {row}");
+            }
+        }
+        assert_eq!(
+            ColIsZero {
+                col: cols::c0(63),
+                constraint_idx: 0
+            }
+            .evaluate(&view),
+            FE::zero()
+        );
+        assert_eq!(
+            ColIsZero {
+                col: cols::c1(63),
+                constraint_idx: 0
+            }
+            .evaluate(&view),
+            FE::zero()
+        );
+        for kind in [OverflowKind::KLtN, OverflowKind::XrLtP] {
+            for i in 0..7 {
+                assert_eq!(
+                    CarryBit {
+                        kind,
+                        i,
+                        constraint_idx: 0
+                    }
+                    .evaluate(&view),
+                    FE::zero(),
+                    "carry bit kind i={i} row {row}"
+                );
+            }
+            assert_eq!(
+                OverflowRequired {
+                    kind,
+                    constraint_idx: 0
+                }
+                .evaluate(&view),
+                FE::zero(),
+                "overflow required row {row}"
+            );
+        }
+    }
+}
+
+#[test]
+fn create_constraints_count() {
+    let (constraints, next) = create_constraints(0);
+    assert_eq!(constraints.len(), 148);
+    assert_eq!(next, 148);
+}
+
+/// The yG carry recurrence is unsatisfiable on a padding row unless two ingredients hold,
+/// and this test locks both:
+///   (a) `q1` pads to `p`, so the `p² − q1·p` offset cancels;
+///   (b) the curve constant `b` is multiplied by `µ`, so it drops when `µ = 0`.
+/// Removing either ingredient leaves a nonzero residual on the yG limb-0 relation.
+/// The x² relation has no standalone constant, so it closes on all-zero padding and is
+/// left fully unconditional.
+#[test]
+fn yg_padding_closes_via_q1_eq_p_and_mu_gated_b() {
+    // yG limb-0 ConvCarry residual on a one-off row with the given `µ` and `q1`.
+    let yg_residual = |mu: u64, q1_is_p: bool| {
+        let mut main = vec![FE::zero(); cols::NUM_COLUMNS];
+        main[cols::MU] = FE::from(mu);
+        if q1_is_p {
+            for (i, &b) in P_BYTES.iter().enumerate() {
+                main[cols::Q1 + i] = FE::from(b as u64);
+            }
+        }
+        let view: TableView<GoldilocksField, GoldilocksExtension> =
+            TableView::new(vec![main], vec![]);
+        ConvCarry {
+            relation: Relation::Yg,
+            i: 0,
+            constraint_idx: 0,
+        }
+        .evaluate(&view)
+    };
+
+    // The padding row this chip emits (µ = 0, q1 = p): both ingredients present → closes.
+    assert_eq!(
+        yg_residual(0, true),
+        FE::zero(),
+        "padding row (µ=0, q1=p) must close"
+    );
+
+    // Drop ingredient (a): q1 = 0 instead of p → the p² offset is uncancelled.
+    assert_eq!(
+        yg_residual(0, false),
+        FE::zero() - FE::from(2209u64),
+        "without q1=p the residual is −P_0² = −47²"
+    );
+
+    // Drop ingredient (b): force the row active (µ = 1) so the curve constant `b`
+    // survives even with q1 = p. Residual = b = 7.
+    assert_eq!(
+        yg_residual(1, true),
+        FE::from(7u64),
+        "with µ=1 (b ungated) the leftover residual is the curve constant b=7"
+    );
+
+    // x² has no standalone constant → closes on an all-zero padding row regardless.
+    let mut zero = vec![FE::zero(); cols::NUM_COLUMNS];
+    zero[cols::MU] = FE::zero();
+    let zview: TableView<GoldilocksField, GoldilocksExtension> = TableView::new(vec![zero], vec![]);
+    assert_eq!(
+        ConvCarry {
+            relation: Relation::X2,
+            i: 0,
+            constraint_idx: 0,
+        }
+        .evaluate(&zview),
+        FE::zero(),
+        "x² closes on all-zero padding (no standalone constant)"
+    );
+}
diff --git a/prover/src/tests/mod.rs b/prover/src/tests/mod.rs
index 54705f401..af1ee316f 100644
--- a/prover/src/tests/mod.rs
+++ b/prover/src/tests/mod.rs
@@ -31,6 +31,12 @@ pub mod disk_spill_tests;
 #[cfg(test)]
 pub mod dvrm_tests;
 #[cfg(test)]
+pub mod ec_scalar_tests;
+#[cfg(test)]
+pub mod ecdas_tests;
+#[cfg(test)]
+pub mod ecsm_tests;
+#[cfg(test)]
 pub mod eq_tests;
 #[cfg(test)]
 pub mod keccak_rnd_tests;
diff --git a/prover/src/tests/prove_elfs_tests.rs b/prover/src/tests/prove_elfs_tests.rs
index 4924a0943..dd8780d74 100644
--- a/prover/src/tests/prove_elfs_tests.rs
+++ b/prover/src/tests/prove_elfs_tests.rs
@@ -1075,6 +1075,177 @@ fn test_prove_elfs_keccak_multi_call() {
     );
 }
 
+#[test]
+fn test_prove_elfs_ecsm() {
+    let _ = env_logger::builder().is_test(true).try_init();
+
+    let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm");
+    let elf = Elf::load(&elf_bytes).expect("Failed to load ELF");
+    let executor =
+        executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor");
+    let result = executor.run().expect("Failed to run program");
+
+    // The guest computes 5·G and commits the 32-byte x-coordinate; cross-check it against
+    // the reference scalar multiplication. Gx, little-endian:
+    let mut gx = [
+        0x79u8, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    gx.reverse();
+    let mut k = [0u8; 32];
+    k[0] = 5;
+    let expected_xr = ecsm::scalar_mul_x(&k, &gx).unwrap();
+    assert_eq!(
+        result.return_values.memory_values,
+        expected_xr.to_vec(),
+        "committed xR must equal x(5G)"
+    );
+
+    let mut traces =
+        Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap();
+    assert!(
+        prove_and_verify_vm_minimal(&elf, &mut traces),
+        "ECSM prove/verify failed"
+    );
+}
+
+#[test]
+fn test_prove_elfs_ecsm_multi() {
+    let _ = env_logger::builder().is_test(true).try_init();
+
+    let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm_multi");
+    let elf = Elf::load(&elf_bytes).expect("Failed to load ELF");
+    let executor =
+        executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor");
+    let result = executor.run().expect("Failed to run program");
+
+    // Gx little-endian.
+    let mut gx = [
+        0x79u8, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    gx.reverse();
+
+    // The guest commits x(1·G) || x(5·G) || x(0xABCDEF·G); cross-check each 32-byte chunk.
+    // k=1 exercises the zero-ECDAS-steps edge; 0xABCDEF exercises many doubles + adds.
+    let mut expected = Vec::new();
+    for kv in [1u64, 5, 0xABCDEF] {
+        let mut k = [0u8; 32];
+        k[..8].copy_from_slice(&kv.to_le_bytes());
+        expected.extend_from_slice(&ecsm::scalar_mul_x(&k, &gx).unwrap());
+    }
+    assert_eq!(
+        result.return_values.memory_values, expected,
+        "committed outputs must equal x(1G) || x(5G) || x(0xABCDEF·G)"
+    );
+
+    let mut traces =
+        Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap();
+    assert!(
+        prove_and_verify_vm_minimal(&elf, &mut traces),
+        "ECSM multi-call prove/verify failed"
+    );
+}
+
+/// End-to-end via the **Rust-guest path**: the `syscalls::ecsm_mul` wrapper computes 5·G and
+/// commits its x-coordinate. Verifies the wrapper works end-to-end (parity with the asm guest).
+#[test]
+fn test_prove_ecsm_rust_guest() {
+    let _ = env_logger::builder().is_test(true).try_init();
+
+    let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .expect("workspace root")
+        .to_path_buf();
+    let elf_bytes = std::fs::read(workspace_root.join("executor/program_artifacts/rust/ecsm.elf"))
+        .expect("ecsm.elf not found — run `make compile-programs-rust`");
+
+    let proof = prove_vm_minimal(&elf_bytes, &[], &Default::default());
+    assert!(
+        verify_vm_minimal(&proof, &elf_bytes),
+        "ecsm rust guest should verify"
+    );
+
+    // Committed output must equal x(5·G).
+    let mut gx = [
+        0x79u8, 0xBE, 0x66, 0x7E, 0xF9, 0xDC, 0xBB, 0xAC, 0x55, 0xA0, 0x62, 0x95, 0xCE, 0x87, 0x0B,
+        0x07, 0x02, 0x9B, 0xFC, 0xDB, 0x2D, 0xCE, 0x28, 0xD9, 0x59, 0xF2, 0x81, 0x5B, 0x16, 0xF8,
+        0x17, 0x98,
+    ];
+    gx.reverse();
+    let mut k = [0u8; 32];
+    k[0] = 5;
+    assert_eq!(
+        proof.public_output,
+        ecsm::scalar_mul_x(&k, &gx).unwrap().to_vec()
+    );
+}
+
+/// Soundness: the verifier REJECTS a forged ECSM result.
+///
+/// A malicious prover must not be able to claim a wrong `k·G`. We tamper the result
+/// x-coordinate `xR` in the ECSM trace (to a different valid byte). `xR` is bound by the
+/// final ECDAS-bus tuple (the constrained double-and-add output) and by the `xR < p`
+/// carry-chain check, so the forgery unbalances the buses / breaks the constraints and the
+/// proof must fail to verify.
+#[test]
+fn test_prove_elfs_ecsm_forged_result_rejected() {
+    use crate::tables::ecsm::cols as ecsm_cols;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+
+    let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm");
+    let elf = Elf::load(&elf_bytes).expect("Failed to load ELF");
+    let executor =
+        executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor");
+    let result = executor.run().expect("Failed to run program");
+    let mut traces =
+        Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap();
+
+    // Forge the low byte of xR on the (single) real ECSM row.
+    let orig = *traces.ecsm.main_table.get(0, ecsm_cols::xr(0));
+    let forged = orig + FieldElement::<GoldilocksField>::one();
+    traces.ecsm.main_table.set(0, ecsm_cols::xr(0), forged);
+
+    assert!(
+        !prove_and_verify_vm_minimal(&elf, &mut traces),
+        "Verifier must reject a forged ECSM result xR"
+    );
+}
+
+/// Verifies SPEC-1 (a spec bug) + deviation D1: `ecdas.toml` omits `IS_BIT(µ)`, but `µ` is the
+/// multiplicity of every ECDAS bus interaction. The implementation adds `IS_BIT(µ)`; this test
+/// confirms it is load-bearing by forging a non-boolean `µ` on a real ECDAS row and asserting
+/// the verifier rejects. (k=5 produces 3 ECDAS rows.)
+#[test]
+fn test_prove_elfs_ecsm_forged_ecdas_mu_rejected() {
+    use crate::tables::ecdas::cols as ecdas_cols;
+
+    let _ = env_logger::builder().is_test(true).try_init();
+
+    let elf_bytes = crate::test_utils::asm_elf_bytes("test_ecsm");
+    let elf = Elf::load(&elf_bytes).expect("Failed to load ELF");
+    let executor =
+        executor::vm::execution::Executor::new(&elf, vec![]).expect("Failed to create executor");
+    let result = executor.run().expect("Failed to run program");
+    let mut traces =
+        Traces::from_elf_and_logs_minimal(&elf, &result.logs, &Default::default(), &[]).unwrap();
+
+    // Row 0 is a real ECDAS step (µ=1); forge µ to a non-boolean value.
+    traces.ecdas.main_table.set(
+        0,
+        ecdas_cols::MU,
+        FieldElement::<GoldilocksField>::from(2u64),
+    );
+
+    assert!(
+        !prove_and_verify_vm_minimal(&elf, &mut traces),
+        "Verifier must reject a non-boolean ECDAS multiplicity (IS_BIT(µ), spec omits it)"
+    );
+}
+
 /// Verifier REJECTS a forged trace where an addr byte cell is set to a
 /// non-byte field element.
 ///
@@ -2260,7 +2431,7 @@ fn test_crafted_zero_count_proof_must_not_verify() {
     let airs = VmAirs::new(&elf, &proof_options, true, &[], &zero_counts, None, None);
 
     let verifier_air_refs = airs.air_refs();
-    assert_eq!(verifier_air_refs.len(), 8);
+    assert_eq!(verifier_air_refs.len(), crate::FIXED_TABLE_COUNT);
 
     let mut bitwise_trace = crate::tables::bitwise::generate_bitwise_trace();
 
diff --git a/syscalls/README.md b/syscalls/README.md
index fa5758741..9e972e0d0 100644
--- a/syscalls/README.md
+++ b/syscalls/README.md
@@ -12,6 +12,7 @@ Published as `lambda-vm-syscalls`. Intended to be used from RISC-V (RV64IM) gues
 | `get_private_input() -> Vec<u8>` | Read the host-supplied private input bytes (memory-mapped at `0xFF000000`). |
 | `sys_halt() -> !` | Terminate execution cleanly. Called automatically after `main` by the default entry point. |
 | `keccak_permute(state: &mut [u64; 25])` | Keccak-f[1600] permutation precompile. |
+| `ecsm_mul(xr: &mut [u8; 32], xg: &[u8; 32], k: &[u8; 32])` | secp256k1 scalar multiplication: writes `xR = (k·G)_x` (32-byte little-endian; `0 < k < N`). |
 
 The crate also provides a default `_start` that initialises the allocator, calls `main`, and halts.
 
diff --git a/syscalls/src/syscalls.rs b/syscalls/src/syscalls.rs
index e4f1d9d65..52246e465 100644
--- a/syscalls/src/syscalls.rs
+++ b/syscalls/src/syscalls.rs
@@ -20,6 +20,10 @@ pub enum SyscallNumbers {
 #[cfg(target_arch = "riscv64")]
 const KECCAK_SYSCALL_NUMBER: usize = usize::MAX - 1;
 
+/// Syscall number for the ECSM secp256k1 scalar-multiply accelerator (u64::MAX - 2, = -3).
+#[cfg(target_arch = "riscv64")]
+const ECSM_SYSCALL_NUMBER: usize = usize::MAX - 2;
+
 /// No-op. The `Print` ecall (a7=1) has no receiver on the Ecall bus, so emitting
 /// it makes the LogUp bus unbalance and the proof fail to verify. Printing isn't
 /// needed in provable programs, so `print_string` does nothing on every target.
@@ -130,6 +134,27 @@ pub fn keccak_permute(_state: &mut [u64; 25]) {
     unimplemented!("syscalls are only implemented for riscv64 targets");
 }
 
+#[cfg(target_arch = "riscv64")]
+/// Compute `xR = (k·G)_x` on secp256k1 via the ECSM accelerator. All values are 32-byte
+/// little-endian. Requires `0 < k < N` and `xG` a valid curve x-coordinate; `xR` may alias `xG`.
+pub fn ecsm_mul(xr: &mut [u8; 32], xg: &[u8; 32], k: &[u8; 32]) {
+    unsafe {
+        asm!(
+            "ecall",
+            in("a0") xr.as_mut_ptr(), // x10 = address to write xR
+            in("a1") xg.as_ptr(),     // x11 = address of xG
+            in("a2") k.as_ptr(),      // x12 = address of k
+            in("a7") ECSM_SYSCALL_NUMBER,
+        )
+    }
+}
+
+#[cfg(not(target_arch = "riscv64"))]
+/// Compute `xR = (k·G)_x` on secp256k1 via the ECSM accelerator (32-byte little-endian values).
+pub fn ecsm_mul(_xr: &mut [u8; 32], _xg: &[u8; 32], _k: &[u8; 32]) {
+    unimplemented!("syscalls are only implemented for riscv64 targets");
+}
+
 // =============================================================================
 // Stub implementations for unsupported std functions
 // These functions are required by Rust's std zkvm module but are not supported