diff --git a/.gitignore b/.gitignore
index 9847c84..63d7e8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,54 @@
+*
+
+### Allowed files and directories ###
+
+!.gitignore
+!.github/
+!.github/workflows/
+!.github/workflows/*
+!.github/dependabot.yml
+
+!LICENSE.md
+!README.md
+!Project.toml
+
+!src/
+!src/*.jl
+!test/
+!test/*.jl
+!docs/
+!docs/src/
+!docs/src/*.md
+!docs/make.jl
+!docs/Project.toml
+
+### Denied even if allowed above ###
+
+# Files generated by invoking Julia with --code-coverage
 *.jl.cov
 *.jl.*.cov
+
+# Files generated by invoking Julia with --track-allocation
 *.jl.mem
-Manifest.toml
-docs/build
-docs/site
-docs/Manifest.toml
+
+# System-specific files and directories generated by the BinaryProvider and BinDeps packages
+# They contain absolute paths specific to the host computer, and so should not be committed
+deps/deps.jl
+deps/build.log
+deps/downloads/
+deps/usr/
+deps/src/
+
+# Build artifacts for creating documentation generated by the Documenter package
+docs/build/
+docs/site/
+
+# File generated by Pkg, the package manager, based on a corresponding Project.toml
+# It records a fixed state of all packages used by the project. As such, it should not be
+# committed for packages, but should be committed for applications that require a static
+# environment.
+Manifest*.toml
+
+# File generated by the Preferences package to store local preferences
+LocalPreferences.toml
+JuliaLocalPreferences.toml
\ No newline at end of file
diff --git a/src/Primes.jl b/src/Primes.jl
index b95fdac..772fccd 100644
--- a/src/Primes.jl
+++ b/src/Primes.jl
@@ -12,6 +12,8 @@ export isprime, primes, primesmask, factor, eachfactor, divisors, ismersenneprim
        nextprime, nextprimes, prevprime, prevprimes, prime, prodfactors, radical, totient
 
 include("factorization.jl")
+include("ecm.jl")
+include("mpqs.jl")
 
 # Primes generating functions
 #     https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
@@ -340,6 +342,53 @@ allocating the storage required for `factor(n)` can introduce significant overhe
 """
 eachfactor(n::Integer) = FactorIterator(n)
 
+# Polyalgorithm dispatch for large integer factorization
+# Ref: Cohen (1993) "A Course in Computational Algebraic Number Theory", §1.7
+
+"""
+    _find_factor(n::T) -> T
+
+Find a non-trivial factor of composite `n` using a polyalgorithm:
+1. Perfect power check (via IntegerMathUtils.ispower/iroot)
+2. ECM (Elliptic Curve Method)
+3. MPQS (Multiple Polynomial Quadratic Sieve)
+"""
+function _find_factor(n::T)::T where {T<:Integer}
+    # 1. Perfect power check using IntegerMathUtils (GMP-backed)
+    if ispower(n)
+        d = find_exponent(n)
+        r = iroot(n, d)
+        return T(r)
+    end
+
+    # Convert to BigInt for ECM/MPQS
+    nb = BigInt(n)
+    bits = ndigits(nb, base=2)
+
+    # 2. Progressive ECM with increasing B1 bounds
+    # Thresholds converted from base-10:
+    # 192 bits ≈ 58 digits | 128 bits ≈ 38 digits
+    ecm_schedule = if bits >= 192
+        # For large numbers, brief ECM then fall through to MPQS
+        [(B1=2000, curves=10), (B1=11000, curves=20)]
+    elseif bits >= 128
+        [(B1=2000, curves=25), (B1=11000, curves=90), (B1=50000, curves=200)]
+    else
+        [(B1=2000, curves=25), (B1=11000, curves=90)]
+    end
+
+    for (B1, curves) in ecm_schedule
+        result = ecm_factor(nb, B1, curves)
+        if result !== nothing
+            return T(result)
+        end
+    end
+
+    # 3. MPQS fallback
+    result = mpqs_factor(nb)
+    return T(result)
+end
+
 # state[1] is the current number to factor (this decreases when factors are found)
 # state[2] is the prime to start trial division with.
 function iterate(f::FactorIterator{T}, state=(f.n, T(3))) where T
@@ -392,8 +441,13 @@ function iterate(f::FactorIterator{T}, state=(f.n, T(3))) where T
     if n <= 2^32 || isprime(n)
         return (n, 1), (T(1), n)
     end
-    should_widen = T <: BigInt || widemul(n - 1, n - 1) ≤ typemax(n)
-    p = should_widen ? pollardfactor(n) : pollardfactor(widen(n))
+    # For large cofactors, use polyalgorithm dispatch (ECM → MPQS)
+    if n > big"100000000000000000000"  # > 10^20
+        p = _find_factor(n)
+    else
+        should_widen = T <: BigInt || widemul(n - 1, n - 1) ≤ typemax(n)
+        p = should_widen ? pollardfactor(n) : pollardfactor(widen(n))
+    end
     num_p = 0
     while true
         q, r = divrem(n, p)
diff --git a/src/ecm.jl b/src/ecm.jl
new file mode 100644
index 0000000..7388f6c
--- /dev/null
+++ b/src/ecm.jl
@@ -0,0 +1,233 @@
+# Elliptic Curve Method (ECM) for integer factorization
+# Ref: Lenstra (1987) "Factoring integers with elliptic curves"
+# Ref: Montgomery (1987) "Speeding the Pollard and Elliptic Curve Methods of Factorization"
+
+"""
+Point on a Montgomery curve in projective coordinates (X:Z).
+The point at infinity is represented by Z == 0.
+"""
+struct MontgomeryCurvePoint
+    X::BigInt
+    Z::BigInt
+end
+
+"""
+In-place modular reduction: sets r = n mod d (non-negative remainder).
+"""
+function _mpz_fdiv_r!(r::BigInt, n::BigInt, d::BigInt)
+    ccall((:__gmpz_fdiv_r, :libgmp), Cvoid, (Ref{BigInt}, Ref{BigInt}, Ref{BigInt}), r, n, d)
+end
+
+"""
+Preallocated scratch space for ECM point arithmetic.
+Avoids BigInt allocation in the hot Montgomery ladder loop.
+t1-t6: scratch for add!/double!; R0/R1/tmp: scratch for scalar_mul!
+"""
+struct ECMBuffers
+    t1::BigInt
+    t2::BigInt
+    t3::BigInt
+    t4::BigInt
+    t5::BigInt
+    t6::BigInt
+    R0_X::BigInt
+    R0_Z::BigInt
+    R1_X::BigInt
+    R1_Z::BigInt
+    tmp_X::BigInt
+    tmp_Z::BigInt
+end
+
+ECMBuffers() = ECMBuffers(BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(),
+                          BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt())
+
+"""
+In-place: mulmod!(dst, a, b, n, tmp) sets dst = (a * b) mod n using tmp as scratch.
+"""
+@inline function _mulmod!(dst::BigInt, a::BigInt, b::BigInt, n::BigInt, tmp::BigInt)
+    Base.GMP.MPZ.mul!(tmp, a, b)
+    _mpz_fdiv_r!(dst, tmp, n)
+end
+
+"""
+Differential addition on Montgomery curve: given P, Q and P-Q, compute P+Q.
+Uses projective coordinates and in-place arithmetic to avoid allocations.
+"""
+function _ecm_add!(res_X::BigInt, res_Z::BigInt,
+                   P_X::BigInt, P_Z::BigInt, Q_X::BigInt, Q_Z::BigInt,
+                   diff_X::BigInt, diff_Z::BigInt, n::BigInt, buf::ECMBuffers)
+    t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6
+    # u = (P.X - P.Z) * (Q.X + Q.Z) mod n
+    Base.GMP.MPZ.sub!(t1, P_X, P_Z)         # t1 = P.X - P.Z
+    Base.GMP.MPZ.add!(t2, Q_X, Q_Z)         # t2 = Q.X + Q.Z
+    _mulmod!(t5, t1, t2, n, t3)             # t5 = u
+
+    # v = (P.X + P.Z) * (Q.X - Q.Z) mod n
+    Base.GMP.MPZ.add!(t1, P_X, P_Z)         # t1 = P.X + P.Z
+    Base.GMP.MPZ.sub!(t2, Q_X, Q_Z)         # t2 = Q.X - Q.Z
+    _mulmod!(t6, t1, t2, n, t3)             # t6 = v
+
+    # add = u + v, sub = u - v
+    Base.GMP.MPZ.add!(t1, t5, t6)           # t1 = add = u + v
+    Base.GMP.MPZ.sub!(t2, t5, t6)           # t2 = sub = u - v
+
+    # X = diff.Z * add^2 mod n
+    _mulmod!(t3, t1, t1, n, t4)             # t3 = add^2 mod n
+    _mulmod!(res_X, diff_Z, t3, n, t4)      # res_X = diff.Z * add^2 mod n
+
+    # Z = diff.X * sub^2 mod n
+    _mulmod!(t3, t2, t2, n, t4)             # t3 = sub^2 mod n
+    _mulmod!(res_Z, diff_X, t3, n, t4)      # res_Z = diff.X * sub^2 mod n
+end
+
+"""
+In-place point doubling on Montgomery curve with parameter a24 = (a+2)/4.
+"""
+function _ecm_double!(res_X::BigInt, res_Z::BigInt,
+                      P_X::BigInt, P_Z::BigInt,
+                      n::BigInt, a24::BigInt, buf::ECMBuffers)
+    t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6
+    # u = (P.X + P.Z)^2 mod n
+    Base.GMP.MPZ.add!(t1, P_X, P_Z)         # t1 = P.X + P.Z
+    _mulmod!(t5, t1, t1, n, t3)             # t5 = u = (P.X+P.Z)^2 mod n
+
+    # v = (P.X - P.Z)^2 mod n
+    Base.GMP.MPZ.sub!(t1, P_X, P_Z)         # t1 = P.X - P.Z
+    _mulmod!(t6, t1, t1, n, t3)             # t6 = v = (P.X-P.Z)^2 mod n
+
+    # diff = u - v
+    Base.GMP.MPZ.sub!(t1, t5, t6)           # t1 = diff = u - v
+
+    # X = u * v mod n
+    _mulmod!(res_X, t5, t6, n, t3)          # res_X = u * v mod n
+
+    # Z = diff * (v + a24 * diff) mod n
+    _mulmod!(t2, a24, t1, n, t3)            # t2 = a24 * diff mod n
+    Base.GMP.MPZ.add!(t2, t6)               # t2 = v + a24 * diff
+    _mulmod!(res_Z, t1, t2, n, t3)          # res_Z = diff * (v + a24*diff) mod n
+end
+
+"""
+Montgomery ladder scalar multiplication: compute [k]P on Montgomery curve.
+Uses preallocated buffers to avoid allocation in the inner loop.
+Returns the point [k]P as (res_X, res_Z).
+"""
+function _ecm_scalar_mul!(res_X::BigInt, res_Z::BigInt,
+                          k::BigInt, P_X::BigInt, P_Z::BigInt,
+                          n::BigInt, a24::BigInt, buf::ECMBuffers)
+    R0_X, R0_Z = buf.R0_X, buf.R0_Z
+    R1_X, R1_Z = buf.R1_X, buf.R1_Z
+    tmp_X, tmp_Z = buf.tmp_X, buf.tmp_Z
+
+    # R0 = P, R1 = 2P
+    Base.GMP.MPZ.set!(R0_X, P_X)
+    Base.GMP.MPZ.set!(R0_Z, P_Z)
+    _ecm_double!(R1_X, R1_Z, P_X, P_Z, n, a24, buf)
+
+    bits = ndigits(k, base=2)
+    for i in (bits - 2):-1:0
+        if isodd(k >> i)
+            _ecm_add!(tmp_X, tmp_Z, R0_X, R0_Z, R1_X, R1_Z, P_X, P_Z, n, buf)
+            Base.GMP.MPZ.set!(R0_X, tmp_X)
+            Base.GMP.MPZ.set!(R0_Z, tmp_Z)
+            _ecm_double!(tmp_X, tmp_Z, R1_X, R1_Z, n, a24, buf)
+            Base.GMP.MPZ.set!(R1_X, tmp_X)
+            Base.GMP.MPZ.set!(R1_Z, tmp_Z)
+        else
+            _ecm_add!(tmp_X, tmp_Z, R0_X, R0_Z, R1_X, R1_Z, P_X, P_Z, n, buf)
+            Base.GMP.MPZ.set!(R1_X, tmp_X)
+            Base.GMP.MPZ.set!(R1_Z, tmp_Z)
+            _ecm_double!(tmp_X, tmp_Z, R0_X, R0_Z, n, a24, buf)
+            Base.GMP.MPZ.set!(R0_X, tmp_X)
+            Base.GMP.MPZ.set!(R0_Z, tmp_Z)
+        end
+    end
+    Base.GMP.MPZ.set!(res_X, R0_X)
+    Base.GMP.MPZ.set!(res_Z, R0_Z)
+end
+
+"""
+    ecm_factor(n::BigInt, B1::Int, num_curves::Int) -> Union{BigInt, Nothing}
+
+Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method.
+Computes [m]P where m = lcm(1..B1) = prod(p^floor(log_p(B1)) for p prime ≤ B1).
+Uses batched gcd (accumulate Z coordinates, check periodically) to reduce gcd calls.
+Returns a factor or `nothing` if none found within the curve budget.
+"""
+function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing}
+    # Precompute prime powers for Stage 1
+    prime_powers = BigInt[]
+    for p in primes(B1)
+        pk = BigInt(p)
+        while pk * p <= B1
+            pk *= p
+        end
+        push!(prime_powers, pk)
+    end
+
+    buf = ECMBuffers()
+    Q_X = BigInt()
+    Q_Z = BigInt()
+    tmp_mul = BigInt()  # scratch for acc * Q.Z
+
+    for _ in 1:num_curves
+        # Generate random curve via σ parameter (Suyama's parametrization)
+        σ = BigInt(rand(6:10^9))
+        u = mod(σ * σ - 5, n)
+        v = mod(4 * σ, n)
+        x0 = mod(u * u * u, n)
+        z0 = mod(v * v * v, n)
+
+        vu_diff = mod(v - u, n)
+        a24_num = mod(vu_diff^3 * mod(3 * u + v, n), n)
+        a24_den = mod(16 * x0 * v, n)
+
+        g = gcd(a24_den, n)
+        if g > 1 && g < n
+            return g
+        end
+        if g == n
+            continue
+        end
+
+        a24_den_inv = invmod(a24_den, n)
+        a24 = mod(a24_num * a24_den_inv, n)
+
+        Base.GMP.MPZ.set!(Q_X, x0)
+        Base.GMP.MPZ.set!(Q_Z, z0)
+
+        # Stage 1: multiply Q by each prime power, with batched gcd
+        degenerate = false
+        acc = BigInt(1)
+        batch_count = 0
+        for pk in prime_powers
+            _ecm_scalar_mul!(Q_X, Q_Z, pk, Q_X, Q_Z, n, a24, buf)
+            Base.GMP.MPZ.mul!(tmp_mul, acc, Q_Z)
+            _mpz_fdiv_r!(acc, tmp_mul, n)
+            batch_count += 1
+
+            if batch_count >= 100
+                g = gcd(acc, n)
+                if g > 1 && g < n
+                    return g
+                end
+                if g == n
+                    degenerate = true
+                    break
+                end
+                Base.GMP.MPZ.set_si!(acc, 1)
+                batch_count = 0
+            end
+        end
+
+        degenerate && continue
+
+        if batch_count > 0
+            g = gcd(acc, n)
+            if g > 1 && g < n
+                return g
+            end
+        end
+    end
+    return nothing
+end
diff --git a/src/mpqs.jl b/src/mpqs.jl
new file mode 100644
index 0000000..1fd3080
--- /dev/null
+++ b/src/mpqs.jl
@@ -0,0 +1,1147 @@
+# Multiple Polynomial Quadratic Sieve (MPQS) for integer factorization
+# Ref: Silverman (1987) "The Multiple Polynomial Quadratic Sieve"
+# Ref: Pomerance (1982) "Analysis and Comparison of Some Integer Factoring Algorithms"
+# Ref: Knuth & Trabb Pardo (1976) "Analysis of a Simple Factorization Algorithm"
+# Ref: Crandall & Pomerance (2005) "Prime Numbers: A Computational Perspective", Ch.6
+
+"""
+Context for MPQS factorization.
+"""
+mutable struct MPQSContext
+    n::BigInt           # kn (multiplied)
+    n_orig::BigInt      # original n
+    k::Int              # Knuth multiplier
+    factor_base::Vector{Int}
+    fb_size::Int
+    sqrt_kn_mod::Vector{Int}
+    log_primes::Vector{UInt8}
+    sieve_interval::Int
+end
+
+"""
+A smooth or partially-smooth relation found during sieving.
+large_prime == 0: fully smooth relation
+large_prime > 0, large_prime2 == 0: single large prime partial
+large_prime > 0, large_prime2 > 0: double large prime partial (large_prime < large_prime2)
+"""
+struct SmoothRelation
+    ax_plus_b::BigInt    # ax + b
+    Q_val::BigInt        # Q(x) = (ax+b)² - kn
+    exponents::BitVector # parity of exponents over factor base
+    full_exp::Vector{Int32} # actual exponent counts of g(x) over factor base
+    a_indices::Vector{Int}  # FB indices of primes composing polynomial `a`
+    large_prime::Int     # first unfactored prime (0 if fully smooth)
+    large_prime2::Int    # second unfactored prime (0 if single LP or smooth)
+end
+
+"""
+Represents an MPQS polynomial Q(x) = (ax + b)² - kn.
+"""
+struct MPQSPolynomial
+    a::BigInt
+    b::BigInt
+    a_factors::Vector{Int}  # indices into factor base for primes composing a
+end
+
+# Precomputed table: digit count → (fb_size, sieve_interval)
+# Ref: Silverman (1987), Crandall & Pomerance (2005) Ch.6
+# Standard MPQS parameters: factor base sizes from L-smoothness formula.
+# Large prime variation significantly amplifies effective relation yield.
+# Ref: L-smoothness formula calibrated against reference implementations
+const _MPQS_PARAMS = [
+    (digits=30, fb_size=200,   sieve_interval=55000),
+    (digits=35, fb_size=400,   sieve_interval=55000),
+    (digits=40, fb_size=700,   sieve_interval=55000),
+    (digits=45, fb_size=1200,  sieve_interval=55000),
+    (digits=50, fb_size=1900,  sieve_interval=66000),
+    (digits=55, fb_size=2900,  sieve_interval=66000),
+    (digits=58, fb_size=3700,  sieve_interval=95000),
+    (digits=60, fb_size=5500,  sieve_interval=250000),
+    (digits=62, fb_size=6200,  sieve_interval=250000),
+    (digits=65, fb_size=7500,  sieve_interval=250000),
+    (digits=68, fb_size=9000,  sieve_interval=250000),
+    (digits=70, fb_size=10000, sieve_interval=250000),
+    (digits=73, fb_size=13000, sieve_interval=250000),
+    (digits=76, fb_size=16000, sieve_interval=250000),
+]
+
+"""
+Select MPQS parameters (factor base size, sieve interval) based on digit count.
+"""
+function _mpqs_select_params(n::BigInt)
+    d = ndigits(n)
+    # Find bracketing entries and interpolate
+    if d <= _MPQS_PARAMS[1].digits
+        return _MPQS_PARAMS[1].fb_size, _MPQS_PARAMS[1].sieve_interval
+    end
+    if d >= _MPQS_PARAMS[end].digits
+        return _MPQS_PARAMS[end].fb_size, _MPQS_PARAMS[end].sieve_interval
+    end
+    for i in 1:(length(_MPQS_PARAMS) - 1)
+        lo, hi = _MPQS_PARAMS[i], _MPQS_PARAMS[i + 1]
+        if lo.digits <= d <= hi.digits
+            t = (d - lo.digits) / (hi.digits - lo.digits)
+            fb = round(Int, lo.fb_size + t * (hi.fb_size - lo.fb_size))
+            si = round(Int, lo.sieve_interval + t * (hi.sieve_interval - lo.sieve_interval))
+            return fb, si
+        end
+    end
+    return _MPQS_PARAMS[end].fb_size, _MPQS_PARAMS[end].sieve_interval
+end
+
+"""
+Select optimal Knuth multiplier for MPQS.
+Scores k ∈ {1,3,5,...,47} using Silverman's formula.
+Ref: Silverman (1987) §4
+"""
+function _select_knuth_multiplier(n::BigInt)::Int
+    best_k = 1
+    best_score = -Inf
+    small_primes = primes(200)
+
+    for k in 1:2:47
+        kn = BigInt(k) * n
+        score = 0.0
+
+        # Special handling for p=2
+        kn_mod8 = Int(_mpz_fdiv_ui(kn, UInt(8)))
+        if kn_mod8 == 1
+            score += 2 * log(2.0)
+        elseif kn_mod8 == 5
+            score += log(2.0)
+        end
+
+        # Score odd primes
+        for p in small_primes
+            p == 2 && continue
+            if mod(k, p) == 0
+                score += log(Float64(p)) / p
+            elseif powermod(kn, div(p - 1, 2), p) == 1  # Legendre(kn, p) == 1
+                score += 2 * log(Float64(p)) / p
+            end
+        end
+
+        # Penalize large k slightly
+        score -= 0.5 * log(Float64(k))
+
+        if score > best_score
+            best_score = score
+            best_k = k
+        end
+    end
+    return best_k
+end
+
+"""
+Build factor base: primes p where Legendre(kn, p) == 1, plus p=2.
+Also computes sqrt(kn) mod p and log₂(p) for each prime.
+"""
+function _build_factor_base(kn::BigInt, fb_size_target::Int)
+    factor_base = Int[2]
+    sqrt_kn_mod = Int[mod(kn, 2) == 0 ? 0 : 1]
+
+    p = 3
+    while length(factor_base) < fb_size_target
+        if isprime(p) && powermod(kn, div(p - 1, 2), p) == 1
+            push!(factor_base, p)
+            # Compute sqrt(kn) mod p using Tonelli-Shanks (result < p, fits in Int)
+            push!(sqrt_kn_mod, Int(_tonelli_shanks(mod(kn, p), p)))
+        end
+        p += 2
+    end
+
+    log_primes = UInt8[floor(UInt8, log2(p)) for p in factor_base]
+
+    return factor_base, sqrt_kn_mod, log_primes
+end
+
+"""
+Tonelli-Shanks algorithm for computing square root mod p.
+Returns r such that r² ≡ n (mod p).
+"""
+function _tonelli_shanks(n::BigInt, p::Int)::BigInt
+    n = mod(n, p)
+    n == 0 && return BigInt(0)
+    p == 2 && return BigInt(n)
+
+    # Factor out powers of 2 from p-1
+    q = p - 1
+    s = 0
+    while iseven(q)
+        q >>= 1
+        s += 1
+    end
+
+    if s == 1
+        # p ≡ 3 (mod 4)
+        return powermod(BigInt(n), div(p + 1, 4), p)
+    end
+
+    # Find a non-residue z
+    z = BigInt(2)
+    while powermod(z, div(p - 1, 2), p) != p - 1
+        z += 1
+    end
+
+    M = s
+    c = powermod(z, q, p)
+    t = powermod(BigInt(n), q, p)
+    R = powermod(BigInt(n), div(q + 1, 2), p)
+
+    while true
+        t == 1 && return R
+        # Find least i such that t^(2^i) ≡ 1 (mod p)
+        i = 0
+        temp = t
+        while temp != 1
+            temp = mod(temp * temp, p)
+            i += 1
+        end
+        b = powermod(c, BigInt(1) << (M - i - 1), p)
+        M = i
+        c = mod(b * b, p)
+        t = mod(t * c, p)
+        R = mod(R * b, p)
+    end
+end
+
+"""
+Generate an MPQS polynomial Q(x) = (ax+b)² - kn.
+`a` is a product of factor base primes, chosen so that a ≈ √(2kn)/M.
+"""
+function _generate_polynomial(ctx::MPQSContext, used_a_sets::Set{Vector{Int}})::Union{MPQSPolynomial, Nothing}
+    fb = ctx.factor_base
+    fb_size = ctx.fb_size
+    kn = ctx.n
+    target_a = isqrt(2 * kn) ÷ ctx.sieve_interval
+
+    # Determine how many primes we need based on target_a and typical prime size
+    # Use primes from the upper portion of the factor base
+    # Start search from position fb_size/3 to fb_size (avoid very small primes)
+    lo = max(5, fb_size ÷ 3)
+    hi = fb_size
+
+    # Estimate number of factors needed: target_a / prod(typical_prime)
+    typical_log_p = log(Float64(fb[div(lo + hi, 2)]))
+    num_facs = max(2, round(Int, log(Float64(target_a)) / typical_log_p))
+    # Allow ±1 variation
+    num_facs_range = max(2, num_facs - 1):min(fb_size ÷ 2, num_facs + 1)
+
+    best_a = BigInt(0)
+    best_indices = Int[]
+    best_diff = BigInt(10)^200
+
+    for _ in 1:100
+        nf = rand(num_facs_range)
+        indices = sort(rand(lo:hi, nf))
+        length(unique(indices)) == nf || continue
+        indices = unique(indices)
+        sort!(indices)
+        length(indices) == nf || continue
+        indices in used_a_sets && continue
+
+        a = prod(BigInt(fb[i]) for i in indices)
+        diff = abs(a - target_a)
+        if diff < best_diff
+            best_diff = diff
+            best_a = a
+            best_indices = indices
+        end
+    end
+
+    isempty(best_indices) && return nothing
+    push!(used_a_sets, best_indices)
+
+    a = best_a
+    # Compute b using CRT: b² ≡ kn (mod a)
+    b = _compute_b(kn, a, best_indices, fb, ctx.sqrt_kn_mod)
+
+    return MPQSPolynomial(a, b, best_indices)
+end
+
+"""
+Compute b such that b² ≡ kn (mod a) using CRT from factor base roots.
+"""
+function _compute_b(kn::BigInt, a::BigInt, a_indices::Vector{Int},
+                    fb::Vector{Int}, sqrt_kn_mod::Vector{Int})::BigInt
+    # Use CRT to combine sqrt(kn) mod p_i for each prime in a
+    b = BigInt(0)
+    for idx in a_indices
+        p = BigInt(fb[idx])
+        r = sqrt_kn_mod[idx]
+        a_div_p = a ÷ p
+        # CRT: b += r * a/p * invmod(a/p, p) mod a
+        inv_a_p = invmod(mod(a_div_p, p), p)
+        b = mod(b + r * a_div_p * inv_a_p, a)
+    end
+    # Ensure b² ≡ kn (mod a); if not, try a-b
+    if mod(b * b, a) != mod(kn, a)
+        b = a - b
+    end
+    return b
+end
+
+
+"""
+In-place unsigned integer division: sets x = x ÷ d, returns remainder.
+Uses GMP's mpz_tdiv_q_ui for single-call quotient+remainder.
+"""
+function _tdiv_q_ui!(x::BigInt, d::UInt)::UInt
+    return ccall((:__gmpz_tdiv_q_ui, :libgmp), Culong,
+                 (Ref{BigInt}, Ref{BigInt}, Culong), x, x, d)
+end
+
+"""
+In-place absolute value: sets x = |x|.
+"""
+function _mpz_abs!(x::BigInt, src::BigInt)
+    ccall((:__gmpz_abs, :libgmp), Cvoid, (Ref{BigInt}, Ref{BigInt}), x, src)
+end
+
+"""
+Compute quotient into `q` and return remainder, without modifying `n`.
+Uses GMP's mpz_tdiv_q_ui: sets q = n ÷ d, returns n mod d.
+"""
+function _tdiv_q_ui_into!(q::BigInt, n::BigInt, d::UInt)::UInt
+    return ccall((:__gmpz_tdiv_q_ui, :libgmp), Culong,
+                 (Ref{BigInt}, Ref{BigInt}, Culong), q, n, d)
+end
+
+"""
+Compute n mod d without allocating a BigInt for the result.
+Returns the remainder as a Culong (UInt). Uses GMP's mpz_fdiv_ui.
+"""
+function _mpz_fdiv_ui(n::BigInt, d::Culong)::Culong
+    return ccall((:__gmpz_fdiv_ui, :libgmp), Culong, (Ref{BigInt}, Culong), n, d)
+end
+
+"""
+Root-guided trial factoring with preallocated buffers.
+`exponents` and `full_exp` are zeroed and filled in-place to avoid allocation.
+`remainder` is a preallocated BigInt used as scratch space.
+Returns a SmoothRelation (copying the buffers) or nothing.
+"""
+@inline function _trial_factor_guided(ax_b::BigInt, gx::BigInt, Qx::BigInt, ctx::MPQSContext,
+                               large_prime_bound::Int, dlp_bound::Int, dlp_bound_sq::Int,
+                               sieve_pos::Int,
+                               starts1::Vector{Int}, starts2::Vector{Int},
+                               a_indices::Vector{Int},
+                               exponents::BitVector, full_exp::Vector{Int32},
+                               remainder::BigInt, quotient_buf::BigInt)::Union{SmoothRelation, Nothing}
+    fb = ctx.factor_base
+    fb_size = ctx.fb_size
+
+    # Reset buffers
+    fill!(exponents, false)
+    fill!(full_exp, Int32(0))
+    _mpz_abs!(remainder, gx)
+
+    if gx < 0
+        exponents[1] = true
+        full_exp[1] = Int32(1)
+    end
+
+    early_exit_j = fb_size + 1  # sentinel: no early exit
+    @inbounds for j in 1:fb_size
+        p = fb[j]
+        p_ui = UInt(p)
+        s1 = starts1[j]
+        if s1 == 0
+            # Prime with no stored sieve position — brute force check
+            r = _tdiv_q_ui_into!(quotient_buf, remainder, p_ui)
+            if r == 0
+                cnt = Int32(1)
+                remainder, quotient_buf = quotient_buf, remainder
+                while true
+                    r = _tdiv_q_ui_into!(quotient_buf, remainder, p_ui)
+                    r != 0 && break
+                    cnt += Int32(1)
+                    remainder, quotient_buf = quotient_buf, remainder
+                end
+                full_exp[j + 1] = cnt
+                exponents[j + 1] = isodd(cnt)
+                if isone(remainder)
+                    break
+                elseif remainder < p * p
+                    early_exit_j = j
+                    break
+                end
+            end
+            continue
+        end
+        hit = (rem(sieve_pos - s1, p) == 0)
+        if !hit
+            s2 = starts2[j]
+            hit = (s2 != s1) && (rem(sieve_pos - s2, p) == 0)
+        end
+        hit || continue
+
+        # p divides g(x) — extract all powers
+        cnt = Int32(0)
+        while true
+            r = _tdiv_q_ui_into!(quotient_buf, remainder, p_ui)
+            r != 0 && break
+            cnt += Int32(1)
+            remainder, quotient_buf = quotient_buf, remainder
+        end
+        full_exp[j + 1] = cnt
+        exponents[j + 1] = isodd(cnt)
+        if isone(remainder)
+            break
+        elseif remainder < p * p
+            early_exit_j = j
+            break
+        end
+    end
+
+    # Handle early exit: remainder has at most one prime factor
+    if early_exit_j <= fb_size && !isone(remainder)
+        rem_int = Int(remainder)
+        idx = searchsortedfirst(fb, rem_int)
+        if idx <= fb_size && fb[idx] == rem_int
+            full_exp[idx + 1] += Int32(1)
+            exponents[idx + 1] ⊻= true
+            Base.GMP.MPZ.set_si!(remainder, 1)
+        end
+    end
+
+    for ai in a_indices
+        exponents[ai + 1] ⊻= true
+    end
+
+    if isone(remainder)
+        axb_copy = BigInt(); Base.GMP.MPZ.set!(axb_copy, ax_b)
+        qx_copy = BigInt(); Base.GMP.MPZ.set!(qx_copy, Qx)
+        return SmoothRelation(axb_copy, qx_copy, copy(exponents), copy(full_exp), a_indices, 0, 0)
+    elseif remainder <= large_prime_bound && remainder > 1
+        lp = Int(remainder)
+        if isprime(lp)
+            axb_copy = BigInt(); Base.GMP.MPZ.set!(axb_copy, ax_b)
+            qx_copy = BigInt(); Base.GMP.MPZ.set!(qx_copy, Qx)
+            return SmoothRelation(axb_copy, qx_copy, copy(exponents), copy(full_exp), a_indices, lp, 0)
+        end
+    elseif remainder <= dlp_bound_sq && remainder > 1 && !isprime(Int(remainder))
+        # Double large prime: try to split composite remainder into two primes
+        f = pollardfactor(remainder)
+        if f !== nothing
+            p1 = Int(min(f, div(remainder, f)))
+            p2 = Int(max(f, div(remainder, f)))
+            if p1 > 1 && p2 > 1 && p1 <= dlp_bound && p2 <= dlp_bound && isprime(p1) && isprime(p2)
+                axb_copy = BigInt(); Base.GMP.MPZ.set!(axb_copy, ax_b)
+                qx_copy = BigInt(); Base.GMP.MPZ.set!(qx_copy, Qx)
+                return SmoothRelation(axb_copy, qx_copy, copy(exponents), copy(full_exp), a_indices, p1, p2)
+            end
+        end
+    end
+
+    return nothing
+end
+
+"""
+Combine two partial relations sharing a large prime `shared_lp`.
+The shared LP cancels (appears with even exponent in the product).
+Returns a relation with remaining LPs (0, 1, or 2).
+"""
+function _combine_partials(r1::SmoothRelation, r2::SmoothRelation,
+                           shared_lp::Int, ctx::MPQSContext)::Union{SmoothRelation, Nothing}
+    combined_exp = r1.exponents .⊻ r2.exponents
+    combined_full = r1.full_exp .+ r2.full_exp
+    combined_a_indices = vcat(r1.a_indices, r2.a_indices)
+    combined_axb = mod(r1.ax_plus_b * r2.ax_plus_b, ctx.n_orig)
+    combined_Q = r1.Q_val * r2.Q_val
+
+    # Collect remaining LPs (those that aren't the shared one)
+    remaining = Int[]
+    for lp in (r1.large_prime, r1.large_prime2, r2.large_prime, r2.large_prime2)
+        lp == 0 && continue
+        lp == shared_lp && continue
+        push!(remaining, lp)
+    end
+
+    # The shared LP appears in both relations, so its product is lp^2 (even exponent).
+    # We track it for square root computation.
+    if isempty(remaining)
+        return SmoothRelation(combined_axb, combined_Q, combined_exp, combined_full,
+                              combined_a_indices, shared_lp, 0)
+    elseif length(remaining) == 1
+        return SmoothRelation(combined_axb, combined_Q, combined_exp, combined_full,
+                              combined_a_indices, shared_lp, remaining[1])
+    end
+    # Two or more remaining LPs — can't use directly as a full relation
+    return nothing
+end
+
+"""
+GF(2) Gaussian elimination on parity vectors (fallback for small matrices).
+Returns dependency sets (indices that XOR to zero).
+"""
+function _gf2_eliminate_gaussian(relations::Vector{BitVector}, fb_size::Int)::Vector{Vector{Int}}
+    nrels = length(relations)
+    nrels == 0 && return Vector{Int}[]
+
+    matrix = [copy(r) for r in relations]
+    history = [falses(nrels) for _ in 1:nrels]
+    for i in 1:nrels
+        history[i][i] = true
+    end
+    ncols = length(relations[1])
+    pivot_col = zeros(Int, ncols)
+
+    for i in 1:nrels
+        pivot = findfirst(matrix[i])
+        while pivot !== nothing
+            if pivot_col[pivot] == 0
+                pivot_col[pivot] = i
+                break
+            else
+                pr = pivot_col[pivot]
+                matrix[i] .⊻= matrix[pr]
+                history[i] .⊻= history[pr]
+                pivot = findfirst(matrix[i])
+            end
+        end
+    end
+
+    dependencies = Vector{Int}[]
+    for i in 1:nrels
+        if !any(matrix[i])
+            push!(dependencies, findall(history[i]))
+        end
+    end
+    return dependencies
+end
+
+"""
+    _gf2_eliminate(relations, fb_size) -> Vector{Vector{Int}}
+
+Find GF(2) dependencies using Gaussian elimination.
+For the matrix sizes encountered in MPQS (up to ~10K), Gaussian elimination
+is faster than Block Lanczos due to lower overhead.
+"""
+function _gf2_eliminate(relations::Vector{BitVector}, fb_size::Int)::Vector{Vector{Int}}
+    return _gf2_eliminate_gaussian(relations, fb_size)
+end
+
+"""
+Extract a factor from a dependency set using stored exponent vectors.
+full_exp stores exponents of g(x) over the factor base (sign in index 1).
+a_indices stores which FB primes compose the polynomial's `a` value.
+Q(x) = a · g(x), so exp_Q(p) = exp_g(p) + exp_a(p).
+"""
+function _extract_factor(n_orig::BigInt, kn::BigInt, k::Int,
+                         dependency::Vector{Int},
+                         relations::Vector{SmoothRelation},
+                         factor_base::Vector{Int})::Union{BigInt, Nothing}
+    x = BigInt(1)
+    fb_size = length(factor_base)
+    total_exp = zeros(Int, fb_size)
+
+    for idx in dependency
+        r = relations[idx]
+        x = mod(x * r.ax_plus_b, n_orig)
+
+        # Add g(x) exponents (indices 2:end of full_exp, index 1 is sign)
+        for j in 1:fb_size
+            total_exp[j] += Int(r.full_exp[j + 1])
+        end
+        # Add a exponents: each prime in a_indices contributes exponent 1
+        for ai in r.a_indices
+            total_exp[ai] += 1
+        end
+    end
+
+    # Compute y = ∏ p^(e/2) mod n (all exponents should be even by GF(2) dependency)
+    y = BigInt(1)
+    for j in 1:fb_size
+        e = total_exp[j]
+        if e > 0
+            y = mod(y * powermod(BigInt(factor_base[j]), e ÷ 2, n_orig), n_orig)
+        end
+    end
+
+    # Include large prime contributions from combined partial relations.
+    # Each shared LP appears with even exponent (lp^2), contributing one lp to y.
+    for idx in dependency
+        r = relations[idx]
+        if r.large_prime > 0
+            y = mod(y * BigInt(r.large_prime), n_orig)
+        end
+        if r.large_prime2 > 0
+            y = mod(y * BigInt(r.large_prime2), n_orig)
+        end
+    end
+
+    g = gcd(abs(x - y), n_orig)
+    if g > 1 && g < n_orig
+        g2 = gcd(g, BigInt(k))
+        if g2 > 1 && g2 < g
+            g = div(g, g2)
+        end
+        if g > 1 && g < n_orig && mod(n_orig, g) == 0
+            return g
+        end
+    end
+
+    g = gcd(abs(x + y), n_orig)
+    if g > 1 && g < n_orig
+        g2 = gcd(g, BigInt(k))
+        if g2 > 1 && g2 < g
+            g = div(g, g2)
+        end
+        if g > 1 && g < n_orig && mod(n_orig, g) == 0
+            return g
+        end
+    end
+
+    return nothing
+end
+
+"""
+Generate SIQS `a` value and CRT components for multiple b-values.
+Returns (a, a_indices, B_components) where B_components[j] is the CRT contribution
+from the j-th prime factor of a. This allows generating 2^(s-1) b-values.
+"""
+function _generate_siqs_a(ctx::MPQSContext, used_a_sets::Set{Vector{Int}})
+    fb = ctx.factor_base
+    fb_size = ctx.fb_size
+    kn = ctx.n
+    target_a = isqrt(2 * kn) ÷ ctx.sieve_interval
+
+    lo = max(5, fb_size ÷ 3)
+    hi = fb_size
+    typical_log_p = log(Float64(fb[div(lo + hi, 2)]))
+    num_facs = max(2, round(Int, log(Float64(target_a)) / typical_log_p))
+    num_facs_range = max(2, num_facs - 1):min(fb_size ÷ 2, num_facs + 1)
+
+    best_a = BigInt(0)
+    best_indices = Int[]
+    best_diff = BigInt(10)^200
+
+    for _ in 1:100
+        nf = rand(num_facs_range)
+        indices = sort(rand(lo:hi, nf))
+        length(unique(indices)) == nf || continue
+        indices = unique(indices)
+        sort!(indices)
+        length(indices) == nf || continue
+        indices in used_a_sets && continue
+
+        a = prod(BigInt(fb[i]) for i in indices)
+        diff = abs(a - target_a)
+        if diff < best_diff
+            best_diff = diff
+            best_a = a
+            best_indices = indices
+        end
+    end
+
+    isempty(best_indices) && return nothing
+    push!(used_a_sets, best_indices)
+
+    a = best_a
+    s = length(best_indices)
+
+    # Compute CRT components B_j via Hensel lifting:
+    # For each prime q_j | a, B_j = sqrt(kn) mod q_j * (a/q_j) * inv(a/q_j, q_j) mod a
+    B_components = Vector{BigInt}(undef, s)
+    for (j, idx) in enumerate(best_indices)
+        q = BigInt(fb[idx])
+        r = ctx.sqrt_kn_mod[idx]
+        a_div_q = a ÷ q
+        inv_a_q = invmod(mod(a_div_q, q), q)
+        B_components[j] = mod(r * a_div_q * inv_a_q, a)
+    end
+
+    return (a, best_indices, B_components)
+end
+
+"""
+Precompute inv(a) mod p for each factor base prime (done once per a value).
+"""
+function _precompute_inv_a(a::BigInt, factor_base::Vector{Int})::Vector{Int}
+    fb_size = length(factor_base)
+    inv_a = Vector{Int}(undef, fb_size)
+    @inbounds for j in 1:fb_size
+        p = factor_base[j]
+        a_mod_p = Int(_mpz_fdiv_ui(a, UInt(p)))
+        if a_mod_p == 0
+            inv_a[j] = 0  # p divides a
+        else
+            inv_a[j] = invmod(a_mod_p, p)
+        end
+    end
+    return inv_a
+end
+
+"""
+Compute initial sieve root offsets from b (requires BigInt mod, done once per a-value).
+offset1[j], offset2[j] ∈ [0, p-1]: sieve positions are offset+1, offset+1+p, ...
+Use -1 as sentinel for "no root" (p | a and 2b ≡ 0 mod p).
+"""
+function _compute_siqs_roots!(offset1::Vector{Int}, offset2::Vector{Int},
+                               b::BigInt, a::BigInt, kn::BigInt,
+                               inv_a::Vector{Int},
+                               sqrt_kn_mod::Vector{Int},
+                               factor_base::Vector{Int},
+                               fb_size::Int, M::Int, a_indices::Vector{Int})
+    c = div(b * b - kn, a)
+    @inbounds for j in 1:fb_size
+        p = factor_base[j]
+        if inv_a[j] == 0
+            b2_mod_p = mod(2 * Int(_mpz_fdiv_ui(b, UInt(p))), p)
+            c_mod_p = Int(_mpz_fdiv_ui(c, UInt(p)))
+            if b2_mod_p == 0
+                offset1[j] = -1; offset2[j] = -1
+            else
+                b_inv = invmod(b2_mod_p, p)
+                r = mod(-c_mod_p * b_inv, p)
+                o = mod(r + M, p)
+                offset1[j] = o; offset2[j] = o
+            end
+        else
+            sqr = sqrt_kn_mod[j]
+            b_mod_p = Int(_mpz_fdiv_ui(b, UInt(p)))
+            ai = inv_a[j]
+            r1 = mod((sqr - b_mod_p) * ai, p)
+            r2 = mod((-sqr - b_mod_p) * ai, p)
+            offset1[j] = mod(r1 + M, p)
+            offset2[j] = mod(r2 + M, p)
+        end
+    end
+end
+
+"""
+Fast SIQS sieve: constant initialization (fill!) + unclamped subtraction + small prime skipping.
+"""
+function _siqs_sieve!(sieve::Vector{UInt8}, sieve_len::Int,
+                      offset1::Vector{Int}, offset2::Vector{Int},
+                      starts1::Vector{Int}, starts2::Vector{Int},
+                      factor_base::Vector{Int}, log_primes::Vector{UInt8},
+                      fb_size::Int, sieve_start_idx::Int, log_init::UInt8)
+    fill!(sieve, log_init)
+
+    @inbounds for j in 1:fb_size
+        if offset1[j] < 0
+            starts1[j] = 0; starts2[j] = 0
+        else
+            starts1[j] = offset1[j] + 1
+            starts2[j] = offset2[j] + 1
+        end
+    end
+
+    @inbounds for j in sieve_start_idx:fb_size
+        p = factor_base[j]
+        logp = log_primes[j]
+        s1 = starts1[j]
+        s1 <= 0 && continue
+
+        s2 = starts2[j]
+        if s2 != s1 && s2 > 0
+            # Two distinct roots: interleave writes for memory-level parallelism
+            pos1 = s1
+            pos2 = s2
+            while pos1 <= sieve_len && pos2 <= sieve_len
+                sieve[pos1] -= logp
+                sieve[pos2] -= logp
+                pos1 += p
+                pos2 += p
+            end
+            while pos1 <= sieve_len
+                sieve[pos1] -= logp
+                pos1 += p
+            end
+            while pos2 <= sieve_len
+                sieve[pos2] -= logp
+                pos2 += p
+            end
+        else
+            # Single root (p | a)
+            pos = s1
+            while pos <= sieve_len
+                sieve[pos] -= logp
+                pos += p
+            end
+        end
+    end
+end
+
+"""
+Collect smooth candidates from sieve. Candidates are positions where the sieve
+value underflowed (>= 0x80), indicating sufficient factorization over the factor base.
+"""
+function _siqs_collect!(poly::MPQSPolynomial, ctx::MPQSContext,
+                        sieve::Vector{UInt8}, starts1::Vector{Int}, starts2::Vector{Int},
+                        relations::Vector{SmoothRelation},
+                        partial_relations::Dict{Int, SmoothRelation},
+                        M::Int, large_prime_bound::Int, dlp_bound::Int, dlp_bound_sq::Int,
+                        tf_exponents::BitVector, tf_full_exp::Vector{Int32},
+                        tf_remainder::BigInt, tf_quotient::BigInt,
+                        tf_ax_b::BigInt, tf_Qx::BigInt, tf_gx::BigInt)
+    a, b = poly.a, poly.b
+    kn = ctx.n
+    sieve_len = length(sieve)
+
+    # Vectorized sieve scanning: process 8 bytes at a time via reinterpret
+    num_chunks = div(sieve_len, 8)
+    body_len = num_chunks * 8
+
+    if num_chunks > 0
+        sieve_body = @view sieve[1:body_len]
+        sieve_chunks = reinterpret(UInt64, sieve_body)
+
+        @inbounds for j in 1:num_chunks
+            chunk = sieve_chunks[j]
+            # The bitmask trick: check if any of the 8 bytes have the high bit set
+            if chunk & 0x8080808080808080 != 0
+                for k in 1:8
+                    idx = (j - 1) * 8 + k
+                    if sieve[idx] >= 0x80
+                        _process_candidate!(idx, a, b, kn, M, ctx,
+                                            large_prime_bound, dlp_bound, dlp_bound_sq,
+                                            starts1, starts2, poly.a_factors,
+                                            tf_exponents, tf_full_exp, tf_remainder, tf_quotient,
+                                            tf_ax_b, tf_Qx, tf_gx,
+                                            relations, partial_relations)
+                    end
+                end
+            end
+        end
+    end
+
+    # Handle the tail (remaining 1 to 7 bytes that didn't fit in a UInt64)
+    @inbounds for i in (body_len + 1):sieve_len
+        if sieve[i] >= 0x80
+            _process_candidate!(i, a, b, kn, M, ctx,
+                                large_prime_bound, dlp_bound, dlp_bound_sq,
+                                starts1, starts2, poly.a_factors,
+                                tf_exponents, tf_full_exp, tf_remainder, tf_quotient,
+                                tf_ax_b, tf_Qx, tf_gx,
+                                relations, partial_relations)
+        end
+    end
+end
+
+"""
+Process a single sieve candidate at position `i`.
+"""
+@inline function _process_candidate!(i::Int, a::BigInt, b::BigInt, kn::BigInt, M::Int,
+                             ctx::MPQSContext,
+                             large_prime_bound::Int, dlp_bound::Int, dlp_bound_sq::Int,
+                             starts1::Vector{Int}, starts2::Vector{Int},
+                             a_factors::Vector{Int},
+                             tf_exponents::BitVector, tf_full_exp::Vector{Int32},
+                             tf_remainder::BigInt, tf_quotient::BigInt,
+                             tf_ax_b::BigInt, tf_Qx::BigInt, tf_gx::BigInt,
+                             relations::Vector{SmoothRelation},
+                             partial_relations::Dict{Int, SmoothRelation})
+    x = i - M - 1
+    Base.GMP.MPZ.mul_si!(tf_ax_b, a, x)
+    Base.GMP.MPZ.add!(tf_ax_b, b)
+    Base.GMP.MPZ.mul!(tf_Qx, tf_ax_b, tf_ax_b)
+    Base.GMP.MPZ.sub!(tf_Qx, kn)
+    iszero(tf_Qx) && return
+    Base.GMP.MPZ.tdiv_q!(tf_gx, tf_Qx, a)
+
+    relation = _trial_factor_guided(tf_ax_b, tf_gx, tf_Qx, ctx,
+                                    large_prime_bound, dlp_bound, dlp_bound_sq,
+                                    i, starts1, starts2,
+                                    a_factors,
+                                    tf_exponents, tf_full_exp, tf_remainder, tf_quotient)
+    relation === nothing && return
+
+    if relation.large_prime == 0 && relation.large_prime2 == 0
+        # Fully smooth
+        push!(relations, relation)
+    elseif relation.large_prime2 == 0
+        # Single large prime partial
+        lp = relation.large_prime
+        if haskey(partial_relations, lp)
+            other = partial_relations[lp]
+            combined = _combine_partials(relation, other, lp, ctx)
+            if combined !== nothing
+                if combined.large_prime2 == 0
+                    push!(relations, combined)
+                end
+                # If combined still has 2 LPs, discard (too complex to chain)
+            end
+            delete!(partial_relations, lp)
+        else
+            partial_relations[lp] = relation
+        end
+    else
+        # Double large prime partial — try to combine with existing single partials
+        lp1, lp2 = relation.large_prime, relation.large_prime2
+        if haskey(partial_relations, lp1)
+            other = partial_relations[lp1]
+            combined = _combine_partials(relation, other, lp1, ctx)
+            delete!(partial_relations, lp1)
+            if combined !== nothing
+                if combined.large_prime2 == 0
+                    # Fully resolved — push as full relation
+                    push!(relations, combined)
+                else
+                    # One remaining LP — store as single-LP partial
+                    remaining_lp = combined.large_prime2
+                    if haskey(partial_relations, remaining_lp)
+                        other2 = partial_relations[remaining_lp]
+                        combined2 = _combine_partials(combined, other2, remaining_lp, ctx)
+                        if combined2 !== nothing && combined2.large_prime2 == 0
+                            push!(relations, combined2)
+                        end
+                        delete!(partial_relations, remaining_lp)
+                    else
+                        partial_relations[remaining_lp] = combined
+                    end
+                end
+            end
+        elseif haskey(partial_relations, lp2)
+            other = partial_relations[lp2]
+            combined = _combine_partials(relation, other, lp2, ctx)
+            delete!(partial_relations, lp2)
+            if combined !== nothing
+                if combined.large_prime2 == 0
+                    push!(relations, combined)
+                else
+                    remaining_lp = combined.large_prime2
+                    if haskey(partial_relations, remaining_lp)
+                        other2 = partial_relations[remaining_lp]
+                        combined2 = _combine_partials(combined, other2, remaining_lp, ctx)
+                        if combined2 !== nothing && combined2.large_prime2 == 0
+                            push!(relations, combined2)
+                        end
+                        delete!(partial_relations, remaining_lp)
+                    else
+                        partial_relations[remaining_lp] = combined
+                    end
+                end
+            end
+        else
+            # Store indexed by smaller LP for future matching
+            partial_relations[lp1] = relation
+        end
+    end
+end
+
+"""
+    mpqs_factor(n::BigInt) -> BigInt
+
+Factor `n` using the Self-Initializing Quadratic Sieve (SIQS variant of MPQS).
+Uses constant sieve initialization, unclamped subtraction, small prime skipping,
+and incremental Gray code root updates for performance.
+Returns a non-trivial factor of `n`.
+"""
+function mpqs_factor(n::BigInt)::BigInt
+    k = _select_knuth_multiplier(n)
+    kn = BigInt(k) * n
+
+    fb_size_target, sieve_interval = _mpqs_select_params(n)
+    factor_base, sqrt_kn_mod, log_primes = _build_factor_base(kn, fb_size_target)
+    actual_fb_size = length(factor_base)
+
+    ctx = MPQSContext(kn, n, k, factor_base, actual_fb_size,
+                      sqrt_kn_mod, log_primes, sieve_interval)
+
+    relations = SmoothRelation[]
+    partial_relations = Dict{Int, SmoothRelation}()
+    used_a_sets = Set{Vector{Int}}()
+    target_relations = actual_fb_size + 50
+
+    M = sieve_interval
+    sieve_len = 2 * M + 1
+
+    # Preallocate buffers (reused across all polynomials)
+    sieve = Vector{UInt8}(undef, sieve_len)
+    starts1 = Vector{Int}(undef, actual_fb_size)
+    starts2 = Vector{Int}(undef, actual_fb_size)
+    offset1 = Vector{Int}(undef, actual_fb_size)
+    offset2 = Vector{Int}(undef, actual_fb_size)
+
+    # Preallocate trial factoring buffers (reused across all candidates)
+    tf_exponents = falses(actual_fb_size + 1)
+    tf_full_exp = zeros(Int32, actual_fb_size + 1)
+    tf_remainder = BigInt(0)
+    tf_quotient = BigInt(0)
+    tf_ax_b = BigInt(0)
+    tf_Qx = BigInt(0)
+    tf_gx = BigInt(0)
+
+    # Constant sieve init: candidates are detected by UInt8 underflow (>= 0x80).
+    # log_init calibrated to match reference implementation threshold.
+    # nbits ≈ log2(sqrt(2kn)), up_to = 1.5 tolerance factor
+    nbits = ndigits(isqrt(2 * kn), base=2)
+    logp_max = floor(Int, log2(Float64(factor_base[end])))
+    up_to = 1.5
+    log_init = UInt8(clamp(nbits - round(Int, up_to * logp_max), 1, 127))
+
+    # Skip small primes in sieve (they hit too many positions relative to their
+    # log contribution). They are still checked during trial factoring.
+    # Ref: skip primes < 2^4 when nbits < 90, < 2^5 when 90-120, < 2^6 when > 120
+    skip_limit = nbits > 120 ? 512 : (nbits > 90 ? 256 : (nbits > 78 ? 128 : 32))
+    sieve_start_idx = 1
+    while sieve_start_idx <= actual_fb_size && factor_base[sieve_start_idx] < skip_limit
+        sieve_start_idx += 1
+    end
+
+    # Large prime bounds
+    p_max = factor_base[end]
+    large_prime_bound = (2 + (p_max >> 16)) * p_max * floor(Int, log2(Float64(p_max)))
+    # Double large prime: accept remainder = p1 * p2 where both < dlp_bound
+    dlp_bound = p_max * 100
+    dlp_bound_sq = dlp_bound * dlp_bound
+
+    max_a_values = 5000
+    done = false
+
+    # Preallocate B_delta arrays (reused across a-values)
+    max_s = 10
+    B_delta = [Vector{Int}(undef, actual_fb_size) for _ in 1:max_s]
+    inv_a = Vector{Int}(undef, actual_fb_size)
+
+    for _ in 1:max_a_values
+        done && break
+
+        result = _generate_siqs_a(ctx, used_a_sets)
+        result === nothing && continue
+        a, a_indices, B_comps = result
+        s = length(a_indices)
+
+        # Precompute inv(a) mod p using factored form (avoids GMP BigInt mod)
+        # a = prod(factor_base[idx] for idx in a_indices)
+        @inbounds for j in 1:actual_fb_size
+            p = factor_base[j]
+            a_mod_p = 1
+            for idx in a_indices
+                if factor_base[idx] == p
+                    a_mod_p = 0
+                    break
+                end
+                a_mod_p = mod(a_mod_p * mod(factor_base[idx], p), p)
+            end
+            if a_mod_p == 0
+                inv_a[j] = 0
+            else
+                inv_a[j] = invmod(a_mod_p, p)
+            end
+        end
+
+        # Precompute root deltas for incremental Gray code b-switching
+        # B_delta[v][j] = 2 * B_v mod p * inv_a[j] mod p
+        for v in 1:s
+            Bv = B_comps[v]
+            delta = B_delta[v]
+            @inbounds for j in 1:actual_fb_size
+                p = factor_base[j]
+                Bv_mod_p = Int(_mpz_fdiv_ui(Bv, UInt(p)))
+                delta[j] = mod(2 * Bv_mod_p * inv_a[j], p)
+            end
+        end
+
+        # Initial b (all positive CRT signs)
+        b = mod(sum(B_comps), a)
+        if mod(b * b, a) != mod(kn, a)
+            b = a - b
+        end
+
+        # Compute initial roots (BigInt mod, once per a)
+        _compute_siqs_roots!(offset1, offset2, b, a, kn, inv_a,
+                              sqrt_kn_mod, factor_base, actual_fb_size, M, a_indices)
+
+        # First polynomial
+        _siqs_sieve!(sieve, sieve_len, offset1, offset2, starts1, starts2,
+                     factor_base, log_primes, actual_fb_size, sieve_start_idx, log_init)
+        poly = MPQSPolynomial(a, b, a_indices)
+        _siqs_collect!(poly, ctx, sieve, starts1, starts2,
+                       relations, partial_relations, M, large_prime_bound,
+                       dlp_bound, dlp_bound_sq,
+                       tf_exponents, tf_full_exp, tf_remainder, tf_quotient,
+                       tf_ax_b, tf_Qx, tf_gx)
+
+        if length(relations) >= target_relations
+            break
+        end
+
+        # Remaining b-values via Gray code incremental root update
+        num_b = 1 << (s - 1)
+        for i in 2:num_b
+            gray_prev = (i - 2) ⊻ ((i - 2) >> 1)
+            gray_curr = (i - 1) ⊻ ((i - 1) >> 1)
+            flip_bit = trailing_zeros(gray_prev ⊻ gray_curr) + 1
+
+            if (gray_curr >> (flip_bit - 1)) & 1 == 1
+                b = mod(b - 2 * B_comps[flip_bit], a)
+                dd = +1  # roots shift right
+            else
+                b = mod(b + 2 * B_comps[flip_bit], a)
+                dd = -1  # roots shift left
+            end
+
+            # Incremental root update (pure Int arithmetic, no BigInt mod)
+            delta = B_delta[flip_bit]
+            @inbounds for j in 1:actual_fb_size
+                inv_a[j] == 0 && continue
+                p = factor_base[j]
+                d = delta[j]
+                if dd > 0
+                    o1 = offset1[j] + d; if o1 >= p; o1 -= p; end
+                    offset1[j] = o1
+                    o2 = offset2[j] + d; if o2 >= p; o2 -= p; end
+                    offset2[j] = o2
+                else
+                    o1 = offset1[j] - d; if o1 < 0; o1 += p; end
+                    offset1[j] = o1
+                    o2 = offset2[j] - d; if o2 < 0; o2 += p; end
+                    offset2[j] = o2
+                end
+            end
+
+            # Recompute roots for primes dividing a (~s primes, needs BigInt)
+            c = div(b * b - kn, a)
+            for idx in a_indices
+                p = factor_base[idx]
+                b2_mod_p = mod(2 * Int(_mpz_fdiv_ui(b, UInt(p))), p)
+                c_mod_p = Int(_mpz_fdiv_ui(c, UInt(p)))
+                if b2_mod_p == 0
+                    offset1[idx] = -1; offset2[idx] = -1
+                else
+                    b_inv = invmod(b2_mod_p, p)
+                    r = mod(-c_mod_p * b_inv, p)
+                    o = mod(r + M, p)
+                    offset1[idx] = o; offset2[idx] = o
+                end
+            end
+
+            # Sieve and collect
+            _siqs_sieve!(sieve, sieve_len, offset1, offset2, starts1, starts2,
+                         factor_base, log_primes, actual_fb_size, sieve_start_idx, log_init)
+            poly = MPQSPolynomial(a, b, a_indices)
+            _siqs_collect!(poly, ctx, sieve, starts1, starts2,
+                           relations, partial_relations, M, large_prime_bound,
+                           dlp_bound, dlp_bound_sq,
+                           tf_exponents, tf_full_exp, tf_remainder, tf_quotient,
+                           tf_ax_b, tf_Qx, tf_gx)
+
+            if length(relations) >= target_relations
+                done = true
+                break
+            end
+        end
+    end
+
+    if length(relations) < actual_fb_size + 1
+        error("failed to factor $n: insufficient smooth relations ($(length(relations)) found, need $(actual_fb_size + 1))")
+    end
+
+    # GF(2) elimination
+    parity_vectors = [r.exponents for r in relations]
+    dependencies = _gf2_eliminate(parity_vectors, actual_fb_size)
+
+    # Try each dependency to find a non-trivial factor
+    for dep in dependencies
+        result = _extract_factor(n, kn, k, dep, relations, factor_base)
+        if result !== nothing
+            return result
+        end
+    end
+
+    error("failed to factor $n")
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index f63e59a..34322a4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -511,3 +511,164 @@ end
     end
     @test primes(2^31-20, 2^31-1) == [2147483629, 2147483647]
 end
+
+# --- Tests for large integer factorization (002-large-int-factorization) ---
+
+@testset "Polyalgorithm dispatch" begin
+    # Perfect power path
+    p = nextprime(big"100000007")
+    n2 = p^2
+    f2 = Primes._find_factor(n2)
+    @test f2 == p
+end
+
+@testset "ECM factorization" begin
+    # Semi-prime with a ~40-bit factor
+    p1 = big"824633720831"   # 40-bit prime
+    q1 = big"1000000007"     # 30-bit prime
+    n1 = p1 * q1
+    f1 = factor(n1)
+    @test prodfactors(f1) == n1
+    @test all(isprime(p) for (p, _) in f1)
+
+    # p ~40 bits, q ~80 bits
+    p2 = big"824633720831"
+    q2 = big"1180591620717411303449"  # ~80-bit prime
+    n2 = p2 * q2
+    f2 = factor(n2)
+    @test prodfactors(f2) == n2
+    @test all(isprime(p) for (p, _) in f2)
+end
+
+@testset "MPQS factorization" begin
+    # Semi-prime with two ~60-bit factors
+    p1 = big"780002082420426809"
+    q1 = big"810735269523504809437013569"
+    n1 = p1 * q1
+    f1 = factor(n1)
+    @test prodfactors(f1) == n1
+    @test all(isprime(p) for (p, _) in f1)
+end
+
+@testset "BigInt factorization" begin
+    # factor(BigInt(n)) returns Factorization{BigInt}
+    n1 = big"2152302898747"
+    f1 = factor(n1)
+    @test f1 isa Primes.Factorization{BigInt}
+    @test prodfactors(f1) == n1
+
+    # BigInt prime returns single-entry
+    p = big"359334085968622831041960188598043661065388726959079837"
+    f2 = factor(p)
+    @test length(f2) == 1
+    @test first(f2) == (p => 1)
+end
+
+@testset "Tonelli-Shanks" begin
+    # p ≡ 3 (mod 4) — fast path (s == 1)
+    @test Primes._tonelli_shanks(big"2", 7)^2 % 7 == 2
+    # p ≡ 1 (mod 4) — general path
+    @test Primes._tonelli_shanks(big"2", 17)^2 % 17 == 2
+    # n == 0 case
+    @test Primes._tonelli_shanks(big"0", 7) == 0
+    # p == 2 case
+    @test Primes._tonelli_shanks(big"1", 2) == 1
+    # Larger prime requiring full Tonelli-Shanks loop (p ≡ 1 mod 8, s > 1)
+    @test Primes._tonelli_shanks(big"2", 41)^2 % 41 == 2
+end
+
+@testset "MPQS parameter selection" begin
+    # Small number (below minimum)
+    fb, si = Primes._mpqs_select_params(big"10"^20)
+    @test fb > 0
+    @test si > 0
+    # Large number (above maximum)
+    fb2, si2 = Primes._mpqs_select_params(big"10"^80)
+    @test fb2 == 16000
+    @test si2 == 250000
+    # Mid-range (interpolation)
+    fb3, si3 = Primes._mpqs_select_params(big"10"^50)
+    @test fb3 > 0
+    @test si3 > 0
+end
+
+@testset "Knuth multiplier selection" begin
+    n = big"632459103267572196107100983820469021721602147490918660274601"
+    k = Primes._select_knuth_multiplier(n)
+    @test k >= 1
+    @test isodd(k)
+    @test k <= 47
+end
+
+@testset "Factor base construction" begin
+    kn = big"3" * big"632459103267572196107100983820469021721602147490918660274601"
+    fb, sqr, logp = Primes._build_factor_base(kn, 20)
+    @test fb[1] == 2
+    @test length(fb) == 20
+    @test length(sqr) == 20
+    @test length(logp) == 20
+    # Verify sqrt_kn_mod values: r^2 ≡ kn (mod p) for odd primes
+    for i in 2:length(fb)
+        p = fb[i]
+        r = sqr[i]
+        @test mod(r * r, p) == mod(kn, p)
+    end
+end
+
+@testset "GF(2) Gaussian elimination" begin
+    # Empty input
+    deps = Primes._gf2_eliminate_gaussian(BitVector[], 3)
+    @test isempty(deps)
+    # Two identical vectors should produce a dependency
+    v = BitVector([true, false, true])
+    deps2 = Primes._gf2_eliminate_gaussian([v, copy(v)], 3)
+    @test length(deps2) >= 1
+    @test sort(deps2[1]) == [1, 2]
+    # Three vectors with one dependency: v1 ⊻ v2 = v3
+    v1 = BitVector([true, false, true, false])
+    v2 = BitVector([false, true, true, false])
+    v3 = BitVector([true, true, false, false])  # v1 ⊻ v2
+    deps3 = Primes._gf2_eliminate_gaussian([v1, v2, v3], 4)
+    @test length(deps3) >= 1
+end
+
+@testset "ECM edge cases" begin
+    # ECM with a small semi-prime
+    n = big"1000000007" * big"1000000009"
+    result = Primes.ecm_factor(n, 2000, 50)
+    @test result !== nothing
+    @test mod(n, result) == 0
+
+    # ECM returns nothing when curves are insufficient for a hard number
+    hard = nextprime(big"10"^30) * nextprime(big"10"^30 + 1000)
+    result2 = Primes.ecm_factor(hard, 100, 1)
+    # May or may not find a factor with just 1 curve — no assertion on result
+end
+
+@testset "Polyalgorithm ECM schedule branches" begin
+    # Small number — exercises the < 128 bits branch
+    p1 = nextprime(big"10"^10)
+    q1 = nextprime(big"10"^10 + 1000)
+    n1 = p1 * q1
+    f1 = Primes._find_factor(n1)
+    @test mod(n1, f1) == 0 && f1 > 1 && f1 < n1
+
+    # Medium number (~40 digits, >= 128 bits) — exercises the 128-192 bits branch
+    p2 = nextprime(big"10"^19)
+    q2 = nextprime(big"10"^19 + 1000)
+    n2 = p2 * q2
+    f2 = Primes._find_factor(n2)
+    @test mod(n2, f2) == 0 && f2 > 1 && f2 < n2
+
+    # Perfect power path
+    @test Primes._find_factor(big"2"^64) == 2
+    @test Primes._find_factor(big"7"^3) == 7
+end
+
+@testset "Large semi-prime factorization (#159)" begin
+    n = big"632459103267572196107100983820469021721602147490918660274601"
+    f = factor(n)
+    @test prodfactors(f) == n
+    @test all(isprime(p) for (p, _) in f)
+    @test length(f) == 2  # it's a semi-prime
+end