diff --git a/.gitignore b/.gitignore index 9847c84..63d7e8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,54 @@ +* + +### Allowed files and directories ### + +!.gitignore +!.github/ +!.github/workflows/ +!.github/workflows/* +!.github/dependabot.yml + +!LICENSE.md +!README.md +!Project.toml + +!src/ +!src/*.jl +!test/ +!test/*.jl +!docs/ +!docs/src/ +!docs/src/*.md +!docs/make.jl +!docs/Project.toml + +### Denied even if allowed above ### + +# Files generated by invoking Julia with --code-coverage *.jl.cov *.jl.*.cov + +# Files generated by invoking Julia with --track-allocation *.jl.mem -Manifest.toml -docs/build -docs/site -docs/Manifest.toml + +# System-specific files and directories generated by the BinaryProvider and BinDeps packages +# They contain absolute paths specific to the host computer, and so should not be committed +deps/deps.jl +deps/build.log +deps/downloads/ +deps/usr/ +deps/src/ + +# Build artifacts for creating documentation generated by the Documenter package +docs/build/ +docs/site/ + +# File generated by Pkg, the package manager, based on a corresponding Project.toml +# It records a fixed state of all packages used by the project. As such, it should not be +# committed for packages, but should be committed for applications that require a static +# environment. +Manifest*.toml + +# File generated by the Preferences package to store local preferences +LocalPreferences.toml +JuliaLocalPreferences.toml \ No newline at end of file diff --git a/src/Primes.jl b/src/Primes.jl index b95fdac..772fccd 100644 --- a/src/Primes.jl +++ b/src/Primes.jl @@ -12,6 +12,8 @@ export isprime, primes, primesmask, factor, eachfactor, divisors, ismersenneprim nextprime, nextprimes, prevprime, prevprimes, prime, prodfactors, radical, totient include("factorization.jl") +include("ecm.jl") +include("mpqs.jl") # Primes generating functions # https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes @@ -340,6 +342,53 @@ allocating the storage required for `factor(n)` can introduce significant overhe """ eachfactor(n::Integer) = FactorIterator(n) +# Polyalgorithm dispatch for large integer factorization +# Ref: Cohen (1993) "A Course in Computational Algebraic Number Theory", §1.7 + +""" + _find_factor(n::T) -> T + +Find a non-trivial factor of composite `n` using a polyalgorithm: +1. Perfect power check (via IntegerMathUtils.ispower/iroot) +2. ECM (Elliptic Curve Method) +3. MPQS (Multiple Polynomial Quadratic Sieve) +""" +function _find_factor(n::T)::T where {T<:Integer} + # 1. Perfect power check using IntegerMathUtils (GMP-backed) + if ispower(n) + d = find_exponent(n) + r = iroot(n, d) + return T(r) + end + + # Convert to BigInt for ECM/MPQS + nb = BigInt(n) + bits = ndigits(nb, base=2) + + # 2. Progressive ECM with increasing B1 bounds + # Thresholds converted from base-10: + # 192 bits ≈ 58 digits | 128 bits ≈ 38 digits + ecm_schedule = if bits >= 192 + # For large numbers, brief ECM then fall through to MPQS + [(B1=2000, curves=10), (B1=11000, curves=20)] + elseif bits >= 128 + [(B1=2000, curves=25), (B1=11000, curves=90), (B1=50000, curves=200)] + else + [(B1=2000, curves=25), (B1=11000, curves=90)] + end + + for (B1, curves) in ecm_schedule + result = ecm_factor(nb, B1, curves) + if result !== nothing + return T(result) + end + end + + # 3. MPQS fallback + result = mpqs_factor(nb) + return T(result) +end + # state[1] is the current number to factor (this decreases when factors are found) # state[2] is the prime to start trial division with. function iterate(f::FactorIterator{T}, state=(f.n, T(3))) where T @@ -392,8 +441,13 @@ function iterate(f::FactorIterator{T}, state=(f.n, T(3))) where T if n <= 2^32 || isprime(n) return (n, 1), (T(1), n) end - should_widen = T <: BigInt || widemul(n - 1, n - 1) ≤ typemax(n) - p = should_widen ? pollardfactor(n) : pollardfactor(widen(n)) + # For large cofactors, use polyalgorithm dispatch (ECM → MPQS) + if n > big"100000000000000000000" # > 10^20 + p = _find_factor(n) + else + should_widen = T <: BigInt || widemul(n - 1, n - 1) ≤ typemax(n) + p = should_widen ? pollardfactor(n) : pollardfactor(widen(n)) + end num_p = 0 while true q, r = divrem(n, p) diff --git a/src/ecm.jl b/src/ecm.jl new file mode 100644 index 0000000..7388f6c --- /dev/null +++ b/src/ecm.jl @@ -0,0 +1,233 @@ +# Elliptic Curve Method (ECM) for integer factorization +# Ref: Lenstra (1987) "Factoring integers with elliptic curves" +# Ref: Montgomery (1987) "Speeding the Pollard and Elliptic Curve Methods of Factorization" + +""" +Point on a Montgomery curve in projective coordinates (X:Z). +The point at infinity is represented by Z == 0. +""" +struct MontgomeryCurvePoint + X::BigInt + Z::BigInt +end + +""" +In-place modular reduction: sets r = n mod d (non-negative remainder). +""" +function _mpz_fdiv_r!(r::BigInt, n::BigInt, d::BigInt) + ccall((:__gmpz_fdiv_r, :libgmp), Cvoid, (Ref{BigInt}, Ref{BigInt}, Ref{BigInt}), r, n, d) +end + +""" +Preallocated scratch space for ECM point arithmetic. +Avoids BigInt allocation in the hot Montgomery ladder loop. +t1-t6: scratch for add!/double!; R0/R1/tmp: scratch for scalar_mul! +""" +struct ECMBuffers + t1::BigInt + t2::BigInt + t3::BigInt + t4::BigInt + t5::BigInt + t6::BigInt + R0_X::BigInt + R0_Z::BigInt + R1_X::BigInt + R1_Z::BigInt + tmp_X::BigInt + tmp_Z::BigInt +end + +ECMBuffers() = ECMBuffers(BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), + BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt()) + +""" +In-place: mulmod!(dst, a, b, n, tmp) sets dst = (a * b) mod n using tmp as scratch. +""" +@inline function _mulmod!(dst::BigInt, a::BigInt, b::BigInt, n::BigInt, tmp::BigInt) + Base.GMP.MPZ.mul!(tmp, a, b) + _mpz_fdiv_r!(dst, tmp, n) +end + +""" +Differential addition on Montgomery curve: given P, Q and P-Q, compute P+Q. +Uses projective coordinates and in-place arithmetic to avoid allocations. +""" +function _ecm_add!(res_X::BigInt, res_Z::BigInt, + P_X::BigInt, P_Z::BigInt, Q_X::BigInt, Q_Z::BigInt, + diff_X::BigInt, diff_Z::BigInt, n::BigInt, buf::ECMBuffers) + t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6 + # u = (P.X - P.Z) * (Q.X + Q.Z) mod n + Base.GMP.MPZ.sub!(t1, P_X, P_Z) # t1 = P.X - P.Z + Base.GMP.MPZ.add!(t2, Q_X, Q_Z) # t2 = Q.X + Q.Z + _mulmod!(t5, t1, t2, n, t3) # t5 = u + + # v = (P.X + P.Z) * (Q.X - Q.Z) mod n + Base.GMP.MPZ.add!(t1, P_X, P_Z) # t1 = P.X + P.Z + Base.GMP.MPZ.sub!(t2, Q_X, Q_Z) # t2 = Q.X - Q.Z + _mulmod!(t6, t1, t2, n, t3) # t6 = v + + # add = u + v, sub = u - v + Base.GMP.MPZ.add!(t1, t5, t6) # t1 = add = u + v + Base.GMP.MPZ.sub!(t2, t5, t6) # t2 = sub = u - v + + # X = diff.Z * add^2 mod n + _mulmod!(t3, t1, t1, n, t4) # t3 = add^2 mod n + _mulmod!(res_X, diff_Z, t3, n, t4) # res_X = diff.Z * add^2 mod n + + # Z = diff.X * sub^2 mod n + _mulmod!(t3, t2, t2, n, t4) # t3 = sub^2 mod n + _mulmod!(res_Z, diff_X, t3, n, t4) # res_Z = diff.X * sub^2 mod n +end + +""" +In-place point doubling on Montgomery curve with parameter a24 = (a+2)/4. +""" +function _ecm_double!(res_X::BigInt, res_Z::BigInt, + P_X::BigInt, P_Z::BigInt, + n::BigInt, a24::BigInt, buf::ECMBuffers) + t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6 + # u = (P.X + P.Z)^2 mod n + Base.GMP.MPZ.add!(t1, P_X, P_Z) # t1 = P.X + P.Z + _mulmod!(t5, t1, t1, n, t3) # t5 = u = (P.X+P.Z)^2 mod n + + # v = (P.X - P.Z)^2 mod n + Base.GMP.MPZ.sub!(t1, P_X, P_Z) # t1 = P.X - P.Z + _mulmod!(t6, t1, t1, n, t3) # t6 = v = (P.X-P.Z)^2 mod n + + # diff = u - v + Base.GMP.MPZ.sub!(t1, t5, t6) # t1 = diff = u - v + + # X = u * v mod n + _mulmod!(res_X, t5, t6, n, t3) # res_X = u * v mod n + + # Z = diff * (v + a24 * diff) mod n + _mulmod!(t2, a24, t1, n, t3) # t2 = a24 * diff mod n + Base.GMP.MPZ.add!(t2, t6) # t2 = v + a24 * diff + _mulmod!(res_Z, t1, t2, n, t3) # res_Z = diff * (v + a24*diff) mod n +end + +""" +Montgomery ladder scalar multiplication: compute [k]P on Montgomery curve. +Uses preallocated buffers to avoid allocation in the inner loop. +Returns the point [k]P as (res_X, res_Z). +""" +function _ecm_scalar_mul!(res_X::BigInt, res_Z::BigInt, + k::BigInt, P_X::BigInt, P_Z::BigInt, + n::BigInt, a24::BigInt, buf::ECMBuffers) + R0_X, R0_Z = buf.R0_X, buf.R0_Z + R1_X, R1_Z = buf.R1_X, buf.R1_Z + tmp_X, tmp_Z = buf.tmp_X, buf.tmp_Z + + # R0 = P, R1 = 2P + Base.GMP.MPZ.set!(R0_X, P_X) + Base.GMP.MPZ.set!(R0_Z, P_Z) + _ecm_double!(R1_X, R1_Z, P_X, P_Z, n, a24, buf) + + bits = ndigits(k, base=2) + for i in (bits - 2):-1:0 + if isodd(k >> i) + _ecm_add!(tmp_X, tmp_Z, R0_X, R0_Z, R1_X, R1_Z, P_X, P_Z, n, buf) + Base.GMP.MPZ.set!(R0_X, tmp_X) + Base.GMP.MPZ.set!(R0_Z, tmp_Z) + _ecm_double!(tmp_X, tmp_Z, R1_X, R1_Z, n, a24, buf) + Base.GMP.MPZ.set!(R1_X, tmp_X) + Base.GMP.MPZ.set!(R1_Z, tmp_Z) + else + _ecm_add!(tmp_X, tmp_Z, R0_X, R0_Z, R1_X, R1_Z, P_X, P_Z, n, buf) + Base.GMP.MPZ.set!(R1_X, tmp_X) + Base.GMP.MPZ.set!(R1_Z, tmp_Z) + _ecm_double!(tmp_X, tmp_Z, R0_X, R0_Z, n, a24, buf) + Base.GMP.MPZ.set!(R0_X, tmp_X) + Base.GMP.MPZ.set!(R0_Z, tmp_Z) + end + end + Base.GMP.MPZ.set!(res_X, R0_X) + Base.GMP.MPZ.set!(res_Z, R0_Z) +end + +""" + ecm_factor(n::BigInt, B1::Int, num_curves::Int) -> Union{BigInt, Nothing} + +Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method. +Computes [m]P where m = lcm(1..B1) = prod(p^floor(log_p(B1)) for p prime ≤ B1). +Uses batched gcd (accumulate Z coordinates, check periodically) to reduce gcd calls. +Returns a factor or `nothing` if none found within the curve budget. +""" +function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} + # Precompute prime powers for Stage 1 + prime_powers = BigInt[] + for p in primes(B1) + pk = BigInt(p) + while pk * p <= B1 + pk *= p + end + push!(prime_powers, pk) + end + + buf = ECMBuffers() + Q_X = BigInt() + Q_Z = BigInt() + tmp_mul = BigInt() # scratch for acc * Q.Z + + for _ in 1:num_curves + # Generate random curve via σ parameter (Suyama's parametrization) + σ = BigInt(rand(6:10^9)) + u = mod(σ * σ - 5, n) + v = mod(4 * σ, n) + x0 = mod(u * u * u, n) + z0 = mod(v * v * v, n) + + vu_diff = mod(v - u, n) + a24_num = mod(vu_diff^3 * mod(3 * u + v, n), n) + a24_den = mod(16 * x0 * v, n) + + g = gcd(a24_den, n) + if g > 1 && g < n + return g + end + if g == n + continue + end + + a24_den_inv = invmod(a24_den, n) + a24 = mod(a24_num * a24_den_inv, n) + + Base.GMP.MPZ.set!(Q_X, x0) + Base.GMP.MPZ.set!(Q_Z, z0) + + # Stage 1: multiply Q by each prime power, with batched gcd + degenerate = false + acc = BigInt(1) + batch_count = 0 + for pk in prime_powers + _ecm_scalar_mul!(Q_X, Q_Z, pk, Q_X, Q_Z, n, a24, buf) + Base.GMP.MPZ.mul!(tmp_mul, acc, Q_Z) + _mpz_fdiv_r!(acc, tmp_mul, n) + batch_count += 1 + + if batch_count >= 100 + g = gcd(acc, n) + if g > 1 && g < n + return g + end + if g == n + degenerate = true + break + end + Base.GMP.MPZ.set_si!(acc, 1) + batch_count = 0 + end + end + + degenerate && continue + + if batch_count > 0 + g = gcd(acc, n) + if g > 1 && g < n + return g + end + end + end + return nothing +end diff --git a/src/mpqs.jl b/src/mpqs.jl new file mode 100644 index 0000000..1fd3080 --- /dev/null +++ b/src/mpqs.jl @@ -0,0 +1,1147 @@ +# Multiple Polynomial Quadratic Sieve (MPQS) for integer factorization +# Ref: Silverman (1987) "The Multiple Polynomial Quadratic Sieve" +# Ref: Pomerance (1982) "Analysis and Comparison of Some Integer Factoring Algorithms" +# Ref: Knuth & Trabb Pardo (1976) "Analysis of a Simple Factorization Algorithm" +# Ref: Crandall & Pomerance (2005) "Prime Numbers: A Computational Perspective", Ch.6 + +""" +Context for MPQS factorization. +""" +mutable struct MPQSContext + n::BigInt # kn (multiplied) + n_orig::BigInt # original n + k::Int # Knuth multiplier + factor_base::Vector{Int} + fb_size::Int + sqrt_kn_mod::Vector{Int} + log_primes::Vector{UInt8} + sieve_interval::Int +end + +""" +A smooth or partially-smooth relation found during sieving. +large_prime == 0: fully smooth relation +large_prime > 0, large_prime2 == 0: single large prime partial +large_prime > 0, large_prime2 > 0: double large prime partial (large_prime < large_prime2) +""" +struct SmoothRelation + ax_plus_b::BigInt # ax + b + Q_val::BigInt # Q(x) = (ax+b)² - kn + exponents::BitVector # parity of exponents over factor base + full_exp::Vector{Int32} # actual exponent counts of g(x) over factor base + a_indices::Vector{Int} # FB indices of primes composing polynomial `a` + large_prime::Int # first unfactored prime (0 if fully smooth) + large_prime2::Int # second unfactored prime (0 if single LP or smooth) +end + +""" +Represents an MPQS polynomial Q(x) = (ax + b)² - kn. +""" +struct MPQSPolynomial + a::BigInt + b::BigInt + a_factors::Vector{Int} # indices into factor base for primes composing a +end + +# Precomputed table: digit count → (fb_size, sieve_interval) +# Ref: Silverman (1987), Crandall & Pomerance (2005) Ch.6 +# Standard MPQS parameters: factor base sizes from L-smoothness formula. +# Large prime variation significantly amplifies effective relation yield. +# Ref: L-smoothness formula calibrated against reference implementations +const _MPQS_PARAMS = [ + (digits=30, fb_size=200, sieve_interval=55000), + (digits=35, fb_size=400, sieve_interval=55000), + (digits=40, fb_size=700, sieve_interval=55000), + (digits=45, fb_size=1200, sieve_interval=55000), + (digits=50, fb_size=1900, sieve_interval=66000), + (digits=55, fb_size=2900, sieve_interval=66000), + (digits=58, fb_size=3700, sieve_interval=95000), + (digits=60, fb_size=5500, sieve_interval=250000), + (digits=62, fb_size=6200, sieve_interval=250000), + (digits=65, fb_size=7500, sieve_interval=250000), + (digits=68, fb_size=9000, sieve_interval=250000), + (digits=70, fb_size=10000, sieve_interval=250000), + (digits=73, fb_size=13000, sieve_interval=250000), + (digits=76, fb_size=16000, sieve_interval=250000), +] + +""" +Select MPQS parameters (factor base size, sieve interval) based on digit count. +""" +function _mpqs_select_params(n::BigInt) + d = ndigits(n) + # Find bracketing entries and interpolate + if d <= _MPQS_PARAMS[1].digits + return _MPQS_PARAMS[1].fb_size, _MPQS_PARAMS[1].sieve_interval + end + if d >= _MPQS_PARAMS[end].digits + return _MPQS_PARAMS[end].fb_size, _MPQS_PARAMS[end].sieve_interval + end + for i in 1:(length(_MPQS_PARAMS) - 1) + lo, hi = _MPQS_PARAMS[i], _MPQS_PARAMS[i + 1] + if lo.digits <= d <= hi.digits + t = (d - lo.digits) / (hi.digits - lo.digits) + fb = round(Int, lo.fb_size + t * (hi.fb_size - lo.fb_size)) + si = round(Int, lo.sieve_interval + t * (hi.sieve_interval - lo.sieve_interval)) + return fb, si + end + end + return _MPQS_PARAMS[end].fb_size, _MPQS_PARAMS[end].sieve_interval +end + +""" +Select optimal Knuth multiplier for MPQS. +Scores k ∈ {1,3,5,...,47} using Silverman's formula. +Ref: Silverman (1987) §4 +""" +function _select_knuth_multiplier(n::BigInt)::Int + best_k = 1 + best_score = -Inf + small_primes = primes(200) + + for k in 1:2:47 + kn = BigInt(k) * n + score = 0.0 + + # Special handling for p=2 + kn_mod8 = Int(_mpz_fdiv_ui(kn, UInt(8))) + if kn_mod8 == 1 + score += 2 * log(2.0) + elseif kn_mod8 == 5 + score += log(2.0) + end + + # Score odd primes + for p in small_primes + p == 2 && continue + if mod(k, p) == 0 + score += log(Float64(p)) / p + elseif powermod(kn, div(p - 1, 2), p) == 1 # Legendre(kn, p) == 1 + score += 2 * log(Float64(p)) / p + end + end + + # Penalize large k slightly + score -= 0.5 * log(Float64(k)) + + if score > best_score + best_score = score + best_k = k + end + end + return best_k +end + +""" +Build factor base: primes p where Legendre(kn, p) == 1, plus p=2. +Also computes sqrt(kn) mod p and log₂(p) for each prime. +""" +function _build_factor_base(kn::BigInt, fb_size_target::Int) + factor_base = Int[2] + sqrt_kn_mod = Int[mod(kn, 2) == 0 ? 0 : 1] + + p = 3 + while length(factor_base) < fb_size_target + if isprime(p) && powermod(kn, div(p - 1, 2), p) == 1 + push!(factor_base, p) + # Compute sqrt(kn) mod p using Tonelli-Shanks (result < p, fits in Int) + push!(sqrt_kn_mod, Int(_tonelli_shanks(mod(kn, p), p))) + end + p += 2 + end + + log_primes = UInt8[floor(UInt8, log2(p)) for p in factor_base] + + return factor_base, sqrt_kn_mod, log_primes +end + +""" +Tonelli-Shanks algorithm for computing square root mod p. +Returns r such that r² ≡ n (mod p). +""" +function _tonelli_shanks(n::BigInt, p::Int)::BigInt + n = mod(n, p) + n == 0 && return BigInt(0) + p == 2 && return BigInt(n) + + # Factor out powers of 2 from p-1 + q = p - 1 + s = 0 + while iseven(q) + q >>= 1 + s += 1 + end + + if s == 1 + # p ≡ 3 (mod 4) + return powermod(BigInt(n), div(p + 1, 4), p) + end + + # Find a non-residue z + z = BigInt(2) + while powermod(z, div(p - 1, 2), p) != p - 1 + z += 1 + end + + M = s + c = powermod(z, q, p) + t = powermod(BigInt(n), q, p) + R = powermod(BigInt(n), div(q + 1, 2), p) + + while true + t == 1 && return R + # Find least i such that t^(2^i) ≡ 1 (mod p) + i = 0 + temp = t + while temp != 1 + temp = mod(temp * temp, p) + i += 1 + end + b = powermod(c, BigInt(1) << (M - i - 1), p) + M = i + c = mod(b * b, p) + t = mod(t * c, p) + R = mod(R * b, p) + end +end + +""" +Generate an MPQS polynomial Q(x) = (ax+b)² - kn. +`a` is a product of factor base primes, chosen so that a ≈ √(2kn)/M. +""" +function _generate_polynomial(ctx::MPQSContext, used_a_sets::Set{Vector{Int}})::Union{MPQSPolynomial, Nothing} + fb = ctx.factor_base + fb_size = ctx.fb_size + kn = ctx.n + target_a = isqrt(2 * kn) ÷ ctx.sieve_interval + + # Determine how many primes we need based on target_a and typical prime size + # Use primes from the upper portion of the factor base + # Start search from position fb_size/3 to fb_size (avoid very small primes) + lo = max(5, fb_size ÷ 3) + hi = fb_size + + # Estimate number of factors needed: target_a / prod(typical_prime) + typical_log_p = log(Float64(fb[div(lo + hi, 2)])) + num_facs = max(2, round(Int, log(Float64(target_a)) / typical_log_p)) + # Allow ±1 variation + num_facs_range = max(2, num_facs - 1):min(fb_size ÷ 2, num_facs + 1) + + best_a = BigInt(0) + best_indices = Int[] + best_diff = BigInt(10)^200 + + for _ in 1:100 + nf = rand(num_facs_range) + indices = sort(rand(lo:hi, nf)) + length(unique(indices)) == nf || continue + indices = unique(indices) + sort!(indices) + length(indices) == nf || continue + indices in used_a_sets && continue + + a = prod(BigInt(fb[i]) for i in indices) + diff = abs(a - target_a) + if diff < best_diff + best_diff = diff + best_a = a + best_indices = indices + end + end + + isempty(best_indices) && return nothing + push!(used_a_sets, best_indices) + + a = best_a + # Compute b using CRT: b² ≡ kn (mod a) + b = _compute_b(kn, a, best_indices, fb, ctx.sqrt_kn_mod) + + return MPQSPolynomial(a, b, best_indices) +end + +""" +Compute b such that b² ≡ kn (mod a) using CRT from factor base roots. +""" +function _compute_b(kn::BigInt, a::BigInt, a_indices::Vector{Int}, + fb::Vector{Int}, sqrt_kn_mod::Vector{Int})::BigInt + # Use CRT to combine sqrt(kn) mod p_i for each prime in a + b = BigInt(0) + for idx in a_indices + p = BigInt(fb[idx]) + r = sqrt_kn_mod[idx] + a_div_p = a ÷ p + # CRT: b += r * a/p * invmod(a/p, p) mod a + inv_a_p = invmod(mod(a_div_p, p), p) + b = mod(b + r * a_div_p * inv_a_p, a) + end + # Ensure b² ≡ kn (mod a); if not, try a-b + if mod(b * b, a) != mod(kn, a) + b = a - b + end + return b +end + + +""" +In-place unsigned integer division: sets x = x ÷ d, returns remainder. +Uses GMP's mpz_tdiv_q_ui for single-call quotient+remainder. +""" +function _tdiv_q_ui!(x::BigInt, d::UInt)::UInt + return ccall((:__gmpz_tdiv_q_ui, :libgmp), Culong, + (Ref{BigInt}, Ref{BigInt}, Culong), x, x, d) +end + +""" +In-place absolute value: sets x = |x|. +""" +function _mpz_abs!(x::BigInt, src::BigInt) + ccall((:__gmpz_abs, :libgmp), Cvoid, (Ref{BigInt}, Ref{BigInt}), x, src) +end + +""" +Compute quotient into `q` and return remainder, without modifying `n`. +Uses GMP's mpz_tdiv_q_ui: sets q = n ÷ d, returns n mod d. +""" +function _tdiv_q_ui_into!(q::BigInt, n::BigInt, d::UInt)::UInt + return ccall((:__gmpz_tdiv_q_ui, :libgmp), Culong, + (Ref{BigInt}, Ref{BigInt}, Culong), q, n, d) +end + +""" +Compute n mod d without allocating a BigInt for the result. +Returns the remainder as a Culong (UInt). Uses GMP's mpz_fdiv_ui. +""" +function _mpz_fdiv_ui(n::BigInt, d::Culong)::Culong + return ccall((:__gmpz_fdiv_ui, :libgmp), Culong, (Ref{BigInt}, Culong), n, d) +end + +""" +Root-guided trial factoring with preallocated buffers. +`exponents` and `full_exp` are zeroed and filled in-place to avoid allocation. +`remainder` is a preallocated BigInt used as scratch space. +Returns a SmoothRelation (copying the buffers) or nothing. +""" +@inline function _trial_factor_guided(ax_b::BigInt, gx::BigInt, Qx::BigInt, ctx::MPQSContext, + large_prime_bound::Int, dlp_bound::Int, dlp_bound_sq::Int, + sieve_pos::Int, + starts1::Vector{Int}, starts2::Vector{Int}, + a_indices::Vector{Int}, + exponents::BitVector, full_exp::Vector{Int32}, + remainder::BigInt, quotient_buf::BigInt)::Union{SmoothRelation, Nothing} + fb = ctx.factor_base + fb_size = ctx.fb_size + + # Reset buffers + fill!(exponents, false) + fill!(full_exp, Int32(0)) + _mpz_abs!(remainder, gx) + + if gx < 0 + exponents[1] = true + full_exp[1] = Int32(1) + end + + early_exit_j = fb_size + 1 # sentinel: no early exit + @inbounds for j in 1:fb_size + p = fb[j] + p_ui = UInt(p) + s1 = starts1[j] + if s1 == 0 + # Prime with no stored sieve position — brute force check + r = _tdiv_q_ui_into!(quotient_buf, remainder, p_ui) + if r == 0 + cnt = Int32(1) + remainder, quotient_buf = quotient_buf, remainder + while true + r = _tdiv_q_ui_into!(quotient_buf, remainder, p_ui) + r != 0 && break + cnt += Int32(1) + remainder, quotient_buf = quotient_buf, remainder + end + full_exp[j + 1] = cnt + exponents[j + 1] = isodd(cnt) + if isone(remainder) + break + elseif remainder < p * p + early_exit_j = j + break + end + end + continue + end + hit = (rem(sieve_pos - s1, p) == 0) + if !hit + s2 = starts2[j] + hit = (s2 != s1) && (rem(sieve_pos - s2, p) == 0) + end + hit || continue + + # p divides g(x) — extract all powers + cnt = Int32(0) + while true + r = _tdiv_q_ui_into!(quotient_buf, remainder, p_ui) + r != 0 && break + cnt += Int32(1) + remainder, quotient_buf = quotient_buf, remainder + end + full_exp[j + 1] = cnt + exponents[j + 1] = isodd(cnt) + if isone(remainder) + break + elseif remainder < p * p + early_exit_j = j + break + end + end + + # Handle early exit: remainder has at most one prime factor + if early_exit_j <= fb_size && !isone(remainder) + rem_int = Int(remainder) + idx = searchsortedfirst(fb, rem_int) + if idx <= fb_size && fb[idx] == rem_int + full_exp[idx + 1] += Int32(1) + exponents[idx + 1] ⊻= true + Base.GMP.MPZ.set_si!(remainder, 1) + end + end + + for ai in a_indices + exponents[ai + 1] ⊻= true + end + + if isone(remainder) + axb_copy = BigInt(); Base.GMP.MPZ.set!(axb_copy, ax_b) + qx_copy = BigInt(); Base.GMP.MPZ.set!(qx_copy, Qx) + return SmoothRelation(axb_copy, qx_copy, copy(exponents), copy(full_exp), a_indices, 0, 0) + elseif remainder <= large_prime_bound && remainder > 1 + lp = Int(remainder) + if isprime(lp) + axb_copy = BigInt(); Base.GMP.MPZ.set!(axb_copy, ax_b) + qx_copy = BigInt(); Base.GMP.MPZ.set!(qx_copy, Qx) + return SmoothRelation(axb_copy, qx_copy, copy(exponents), copy(full_exp), a_indices, lp, 0) + end + elseif remainder <= dlp_bound_sq && remainder > 1 && !isprime(Int(remainder)) + # Double large prime: try to split composite remainder into two primes + f = pollardfactor(remainder) + if f !== nothing + p1 = Int(min(f, div(remainder, f))) + p2 = Int(max(f, div(remainder, f))) + if p1 > 1 && p2 > 1 && p1 <= dlp_bound && p2 <= dlp_bound && isprime(p1) && isprime(p2) + axb_copy = BigInt(); Base.GMP.MPZ.set!(axb_copy, ax_b) + qx_copy = BigInt(); Base.GMP.MPZ.set!(qx_copy, Qx) + return SmoothRelation(axb_copy, qx_copy, copy(exponents), copy(full_exp), a_indices, p1, p2) + end + end + end + + return nothing +end + +""" +Combine two partial relations sharing a large prime `shared_lp`. +The shared LP cancels (appears with even exponent in the product). +Returns a relation with remaining LPs (0, 1, or 2). +""" +function _combine_partials(r1::SmoothRelation, r2::SmoothRelation, + shared_lp::Int, ctx::MPQSContext)::Union{SmoothRelation, Nothing} + combined_exp = r1.exponents .⊻ r2.exponents + combined_full = r1.full_exp .+ r2.full_exp + combined_a_indices = vcat(r1.a_indices, r2.a_indices) + combined_axb = mod(r1.ax_plus_b * r2.ax_plus_b, ctx.n_orig) + combined_Q = r1.Q_val * r2.Q_val + + # Collect remaining LPs (those that aren't the shared one) + remaining = Int[] + for lp in (r1.large_prime, r1.large_prime2, r2.large_prime, r2.large_prime2) + lp == 0 && continue + lp == shared_lp && continue + push!(remaining, lp) + end + + # The shared LP appears in both relations, so its product is lp^2 (even exponent). + # We track it for square root computation. + if isempty(remaining) + return SmoothRelation(combined_axb, combined_Q, combined_exp, combined_full, + combined_a_indices, shared_lp, 0) + elseif length(remaining) == 1 + return SmoothRelation(combined_axb, combined_Q, combined_exp, combined_full, + combined_a_indices, shared_lp, remaining[1]) + end + # Two or more remaining LPs — can't use directly as a full relation + return nothing +end + +""" +GF(2) Gaussian elimination on parity vectors (fallback for small matrices). +Returns dependency sets (indices that XOR to zero). +""" +function _gf2_eliminate_gaussian(relations::Vector{BitVector}, fb_size::Int)::Vector{Vector{Int}} + nrels = length(relations) + nrels == 0 && return Vector{Int}[] + + matrix = [copy(r) for r in relations] + history = [falses(nrels) for _ in 1:nrels] + for i in 1:nrels + history[i][i] = true + end + ncols = length(relations[1]) + pivot_col = zeros(Int, ncols) + + for i in 1:nrels + pivot = findfirst(matrix[i]) + while pivot !== nothing + if pivot_col[pivot] == 0 + pivot_col[pivot] = i + break + else + pr = pivot_col[pivot] + matrix[i] .⊻= matrix[pr] + history[i] .⊻= history[pr] + pivot = findfirst(matrix[i]) + end + end + end + + dependencies = Vector{Int}[] + for i in 1:nrels + if !any(matrix[i]) + push!(dependencies, findall(history[i])) + end + end + return dependencies +end + +""" + _gf2_eliminate(relations, fb_size) -> Vector{Vector{Int}} + +Find GF(2) dependencies using Gaussian elimination. +For the matrix sizes encountered in MPQS (up to ~10K), Gaussian elimination +is faster than Block Lanczos due to lower overhead. +""" +function _gf2_eliminate(relations::Vector{BitVector}, fb_size::Int)::Vector{Vector{Int}} + return _gf2_eliminate_gaussian(relations, fb_size) +end + +""" +Extract a factor from a dependency set using stored exponent vectors. +full_exp stores exponents of g(x) over the factor base (sign in index 1). +a_indices stores which FB primes compose the polynomial's `a` value. +Q(x) = a · g(x), so exp_Q(p) = exp_g(p) + exp_a(p). +""" +function _extract_factor(n_orig::BigInt, kn::BigInt, k::Int, + dependency::Vector{Int}, + relations::Vector{SmoothRelation}, + factor_base::Vector{Int})::Union{BigInt, Nothing} + x = BigInt(1) + fb_size = length(factor_base) + total_exp = zeros(Int, fb_size) + + for idx in dependency + r = relations[idx] + x = mod(x * r.ax_plus_b, n_orig) + + # Add g(x) exponents (indices 2:end of full_exp, index 1 is sign) + for j in 1:fb_size + total_exp[j] += Int(r.full_exp[j + 1]) + end + # Add a exponents: each prime in a_indices contributes exponent 1 + for ai in r.a_indices + total_exp[ai] += 1 + end + end + + # Compute y = ∏ p^(e/2) mod n (all exponents should be even by GF(2) dependency) + y = BigInt(1) + for j in 1:fb_size + e = total_exp[j] + if e > 0 + y = mod(y * powermod(BigInt(factor_base[j]), e ÷ 2, n_orig), n_orig) + end + end + + # Include large prime contributions from combined partial relations. + # Each shared LP appears with even exponent (lp^2), contributing one lp to y. + for idx in dependency + r = relations[idx] + if r.large_prime > 0 + y = mod(y * BigInt(r.large_prime), n_orig) + end + if r.large_prime2 > 0 + y = mod(y * BigInt(r.large_prime2), n_orig) + end + end + + g = gcd(abs(x - y), n_orig) + if g > 1 && g < n_orig + g2 = gcd(g, BigInt(k)) + if g2 > 1 && g2 < g + g = div(g, g2) + end + if g > 1 && g < n_orig && mod(n_orig, g) == 0 + return g + end + end + + g = gcd(abs(x + y), n_orig) + if g > 1 && g < n_orig + g2 = gcd(g, BigInt(k)) + if g2 > 1 && g2 < g + g = div(g, g2) + end + if g > 1 && g < n_orig && mod(n_orig, g) == 0 + return g + end + end + + return nothing +end + +""" +Generate SIQS `a` value and CRT components for multiple b-values. +Returns (a, a_indices, B_components) where B_components[j] is the CRT contribution +from the j-th prime factor of a. This allows generating 2^(s-1) b-values. +""" +function _generate_siqs_a(ctx::MPQSContext, used_a_sets::Set{Vector{Int}}) + fb = ctx.factor_base + fb_size = ctx.fb_size + kn = ctx.n + target_a = isqrt(2 * kn) ÷ ctx.sieve_interval + + lo = max(5, fb_size ÷ 3) + hi = fb_size + typical_log_p = log(Float64(fb[div(lo + hi, 2)])) + num_facs = max(2, round(Int, log(Float64(target_a)) / typical_log_p)) + num_facs_range = max(2, num_facs - 1):min(fb_size ÷ 2, num_facs + 1) + + best_a = BigInt(0) + best_indices = Int[] + best_diff = BigInt(10)^200 + + for _ in 1:100 + nf = rand(num_facs_range) + indices = sort(rand(lo:hi, nf)) + length(unique(indices)) == nf || continue + indices = unique(indices) + sort!(indices) + length(indices) == nf || continue + indices in used_a_sets && continue + + a = prod(BigInt(fb[i]) for i in indices) + diff = abs(a - target_a) + if diff < best_diff + best_diff = diff + best_a = a + best_indices = indices + end + end + + isempty(best_indices) && return nothing + push!(used_a_sets, best_indices) + + a = best_a + s = length(best_indices) + + # Compute CRT components B_j via Hensel lifting: + # For each prime q_j | a, B_j = sqrt(kn) mod q_j * (a/q_j) * inv(a/q_j, q_j) mod a + B_components = Vector{BigInt}(undef, s) + for (j, idx) in enumerate(best_indices) + q = BigInt(fb[idx]) + r = ctx.sqrt_kn_mod[idx] + a_div_q = a ÷ q + inv_a_q = invmod(mod(a_div_q, q), q) + B_components[j] = mod(r * a_div_q * inv_a_q, a) + end + + return (a, best_indices, B_components) +end + +""" +Precompute inv(a) mod p for each factor base prime (done once per a value). +""" +function _precompute_inv_a(a::BigInt, factor_base::Vector{Int})::Vector{Int} + fb_size = length(factor_base) + inv_a = Vector{Int}(undef, fb_size) + @inbounds for j in 1:fb_size + p = factor_base[j] + a_mod_p = Int(_mpz_fdiv_ui(a, UInt(p))) + if a_mod_p == 0 + inv_a[j] = 0 # p divides a + else + inv_a[j] = invmod(a_mod_p, p) + end + end + return inv_a +end + +""" +Compute initial sieve root offsets from b (requires BigInt mod, done once per a-value). +offset1[j], offset2[j] ∈ [0, p-1]: sieve positions are offset+1, offset+1+p, ... +Use -1 as sentinel for "no root" (p | a and 2b ≡ 0 mod p). +""" +function _compute_siqs_roots!(offset1::Vector{Int}, offset2::Vector{Int}, + b::BigInt, a::BigInt, kn::BigInt, + inv_a::Vector{Int}, + sqrt_kn_mod::Vector{Int}, + factor_base::Vector{Int}, + fb_size::Int, M::Int, a_indices::Vector{Int}) + c = div(b * b - kn, a) + @inbounds for j in 1:fb_size + p = factor_base[j] + if inv_a[j] == 0 + b2_mod_p = mod(2 * Int(_mpz_fdiv_ui(b, UInt(p))), p) + c_mod_p = Int(_mpz_fdiv_ui(c, UInt(p))) + if b2_mod_p == 0 + offset1[j] = -1; offset2[j] = -1 + else + b_inv = invmod(b2_mod_p, p) + r = mod(-c_mod_p * b_inv, p) + o = mod(r + M, p) + offset1[j] = o; offset2[j] = o + end + else + sqr = sqrt_kn_mod[j] + b_mod_p = Int(_mpz_fdiv_ui(b, UInt(p))) + ai = inv_a[j] + r1 = mod((sqr - b_mod_p) * ai, p) + r2 = mod((-sqr - b_mod_p) * ai, p) + offset1[j] = mod(r1 + M, p) + offset2[j] = mod(r2 + M, p) + end + end +end + +""" +Fast SIQS sieve: constant initialization (fill!) + unclamped subtraction + small prime skipping. +""" +function _siqs_sieve!(sieve::Vector{UInt8}, sieve_len::Int, + offset1::Vector{Int}, offset2::Vector{Int}, + starts1::Vector{Int}, starts2::Vector{Int}, + factor_base::Vector{Int}, log_primes::Vector{UInt8}, + fb_size::Int, sieve_start_idx::Int, log_init::UInt8) + fill!(sieve, log_init) + + @inbounds for j in 1:fb_size + if offset1[j] < 0 + starts1[j] = 0; starts2[j] = 0 + else + starts1[j] = offset1[j] + 1 + starts2[j] = offset2[j] + 1 + end + end + + @inbounds for j in sieve_start_idx:fb_size + p = factor_base[j] + logp = log_primes[j] + s1 = starts1[j] + s1 <= 0 && continue + + s2 = starts2[j] + if s2 != s1 && s2 > 0 + # Two distinct roots: interleave writes for memory-level parallelism + pos1 = s1 + pos2 = s2 + while pos1 <= sieve_len && pos2 <= sieve_len + sieve[pos1] -= logp + sieve[pos2] -= logp + pos1 += p + pos2 += p + end + while pos1 <= sieve_len + sieve[pos1] -= logp + pos1 += p + end + while pos2 <= sieve_len + sieve[pos2] -= logp + pos2 += p + end + else + # Single root (p | a) + pos = s1 + while pos <= sieve_len + sieve[pos] -= logp + pos += p + end + end + end +end + +""" +Collect smooth candidates from sieve. Candidates are positions where the sieve +value underflowed (>= 0x80), indicating sufficient factorization over the factor base. +""" +function _siqs_collect!(poly::MPQSPolynomial, ctx::MPQSContext, + sieve::Vector{UInt8}, starts1::Vector{Int}, starts2::Vector{Int}, + relations::Vector{SmoothRelation}, + partial_relations::Dict{Int, SmoothRelation}, + M::Int, large_prime_bound::Int, dlp_bound::Int, dlp_bound_sq::Int, + tf_exponents::BitVector, tf_full_exp::Vector{Int32}, + tf_remainder::BigInt, tf_quotient::BigInt, + tf_ax_b::BigInt, tf_Qx::BigInt, tf_gx::BigInt) + a, b = poly.a, poly.b + kn = ctx.n + sieve_len = length(sieve) + + # Vectorized sieve scanning: process 8 bytes at a time via reinterpret + num_chunks = div(sieve_len, 8) + body_len = num_chunks * 8 + + if num_chunks > 0 + sieve_body = @view sieve[1:body_len] + sieve_chunks = reinterpret(UInt64, sieve_body) + + @inbounds for j in 1:num_chunks + chunk = sieve_chunks[j] + # The bitmask trick: check if any of the 8 bytes have the high bit set + if chunk & 0x8080808080808080 != 0 + for k in 1:8 + idx = (j - 1) * 8 + k + if sieve[idx] >= 0x80 + _process_candidate!(idx, a, b, kn, M, ctx, + large_prime_bound, dlp_bound, dlp_bound_sq, + starts1, starts2, poly.a_factors, + tf_exponents, tf_full_exp, tf_remainder, tf_quotient, + tf_ax_b, tf_Qx, tf_gx, + relations, partial_relations) + end + end + end + end + end + + # Handle the tail (remaining 1 to 7 bytes that didn't fit in a UInt64) + @inbounds for i in (body_len + 1):sieve_len + if sieve[i] >= 0x80 + _process_candidate!(i, a, b, kn, M, ctx, + large_prime_bound, dlp_bound, dlp_bound_sq, + starts1, starts2, poly.a_factors, + tf_exponents, tf_full_exp, tf_remainder, tf_quotient, + tf_ax_b, tf_Qx, tf_gx, + relations, partial_relations) + end + end +end + +""" +Process a single sieve candidate at position `i`. +""" +@inline function _process_candidate!(i::Int, a::BigInt, b::BigInt, kn::BigInt, M::Int, + ctx::MPQSContext, + large_prime_bound::Int, dlp_bound::Int, dlp_bound_sq::Int, + starts1::Vector{Int}, starts2::Vector{Int}, + a_factors::Vector{Int}, + tf_exponents::BitVector, tf_full_exp::Vector{Int32}, + tf_remainder::BigInt, tf_quotient::BigInt, + tf_ax_b::BigInt, tf_Qx::BigInt, tf_gx::BigInt, + relations::Vector{SmoothRelation}, + partial_relations::Dict{Int, SmoothRelation}) + x = i - M - 1 + Base.GMP.MPZ.mul_si!(tf_ax_b, a, x) + Base.GMP.MPZ.add!(tf_ax_b, b) + Base.GMP.MPZ.mul!(tf_Qx, tf_ax_b, tf_ax_b) + Base.GMP.MPZ.sub!(tf_Qx, kn) + iszero(tf_Qx) && return + Base.GMP.MPZ.tdiv_q!(tf_gx, tf_Qx, a) + + relation = _trial_factor_guided(tf_ax_b, tf_gx, tf_Qx, ctx, + large_prime_bound, dlp_bound, dlp_bound_sq, + i, starts1, starts2, + a_factors, + tf_exponents, tf_full_exp, tf_remainder, tf_quotient) + relation === nothing && return + + if relation.large_prime == 0 && relation.large_prime2 == 0 + # Fully smooth + push!(relations, relation) + elseif relation.large_prime2 == 0 + # Single large prime partial + lp = relation.large_prime + if haskey(partial_relations, lp) + other = partial_relations[lp] + combined = _combine_partials(relation, other, lp, ctx) + if combined !== nothing + if combined.large_prime2 == 0 + push!(relations, combined) + end + # If combined still has 2 LPs, discard (too complex to chain) + end + delete!(partial_relations, lp) + else + partial_relations[lp] = relation + end + else + # Double large prime partial — try to combine with existing single partials + lp1, lp2 = relation.large_prime, relation.large_prime2 + if haskey(partial_relations, lp1) + other = partial_relations[lp1] + combined = _combine_partials(relation, other, lp1, ctx) + delete!(partial_relations, lp1) + if combined !== nothing + if combined.large_prime2 == 0 + # Fully resolved — push as full relation + push!(relations, combined) + else + # One remaining LP — store as single-LP partial + remaining_lp = combined.large_prime2 + if haskey(partial_relations, remaining_lp) + other2 = partial_relations[remaining_lp] + combined2 = _combine_partials(combined, other2, remaining_lp, ctx) + if combined2 !== nothing && combined2.large_prime2 == 0 + push!(relations, combined2) + end + delete!(partial_relations, remaining_lp) + else + partial_relations[remaining_lp] = combined + end + end + end + elseif haskey(partial_relations, lp2) + other = partial_relations[lp2] + combined = _combine_partials(relation, other, lp2, ctx) + delete!(partial_relations, lp2) + if combined !== nothing + if combined.large_prime2 == 0 + push!(relations, combined) + else + remaining_lp = combined.large_prime2 + if haskey(partial_relations, remaining_lp) + other2 = partial_relations[remaining_lp] + combined2 = _combine_partials(combined, other2, remaining_lp, ctx) + if combined2 !== nothing && combined2.large_prime2 == 0 + push!(relations, combined2) + end + delete!(partial_relations, remaining_lp) + else + partial_relations[remaining_lp] = combined + end + end + end + else + # Store indexed by smaller LP for future matching + partial_relations[lp1] = relation + end + end +end + +""" + mpqs_factor(n::BigInt) -> BigInt + +Factor `n` using the Self-Initializing Quadratic Sieve (SIQS variant of MPQS). +Uses constant sieve initialization, unclamped subtraction, small prime skipping, +and incremental Gray code root updates for performance. +Returns a non-trivial factor of `n`. +""" +function mpqs_factor(n::BigInt)::BigInt + k = _select_knuth_multiplier(n) + kn = BigInt(k) * n + + fb_size_target, sieve_interval = _mpqs_select_params(n) + factor_base, sqrt_kn_mod, log_primes = _build_factor_base(kn, fb_size_target) + actual_fb_size = length(factor_base) + + ctx = MPQSContext(kn, n, k, factor_base, actual_fb_size, + sqrt_kn_mod, log_primes, sieve_interval) + + relations = SmoothRelation[] + partial_relations = Dict{Int, SmoothRelation}() + used_a_sets = Set{Vector{Int}}() + target_relations = actual_fb_size + 50 + + M = sieve_interval + sieve_len = 2 * M + 1 + + # Preallocate buffers (reused across all polynomials) + sieve = Vector{UInt8}(undef, sieve_len) + starts1 = Vector{Int}(undef, actual_fb_size) + starts2 = Vector{Int}(undef, actual_fb_size) + offset1 = Vector{Int}(undef, actual_fb_size) + offset2 = Vector{Int}(undef, actual_fb_size) + + # Preallocate trial factoring buffers (reused across all candidates) + tf_exponents = falses(actual_fb_size + 1) + tf_full_exp = zeros(Int32, actual_fb_size + 1) + tf_remainder = BigInt(0) + tf_quotient = BigInt(0) + tf_ax_b = BigInt(0) + tf_Qx = BigInt(0) + tf_gx = BigInt(0) + + # Constant sieve init: candidates are detected by UInt8 underflow (>= 0x80). + # log_init calibrated to match reference implementation threshold. + # nbits ≈ log2(sqrt(2kn)), up_to = 1.5 tolerance factor + nbits = ndigits(isqrt(2 * kn), base=2) + logp_max = floor(Int, log2(Float64(factor_base[end]))) + up_to = 1.5 + log_init = UInt8(clamp(nbits - round(Int, up_to * logp_max), 1, 127)) + + # Skip small primes in sieve (they hit too many positions relative to their + # log contribution). They are still checked during trial factoring. + # Ref: skip primes < 2^4 when nbits < 90, < 2^5 when 90-120, < 2^6 when > 120 + skip_limit = nbits > 120 ? 512 : (nbits > 90 ? 256 : (nbits > 78 ? 128 : 32)) + sieve_start_idx = 1 + while sieve_start_idx <= actual_fb_size && factor_base[sieve_start_idx] < skip_limit + sieve_start_idx += 1 + end + + # Large prime bounds + p_max = factor_base[end] + large_prime_bound = (2 + (p_max >> 16)) * p_max * floor(Int, log2(Float64(p_max))) + # Double large prime: accept remainder = p1 * p2 where both < dlp_bound + dlp_bound = p_max * 100 + dlp_bound_sq = dlp_bound * dlp_bound + + max_a_values = 5000 + done = false + + # Preallocate B_delta arrays (reused across a-values) + max_s = 10 + B_delta = [Vector{Int}(undef, actual_fb_size) for _ in 1:max_s] + inv_a = Vector{Int}(undef, actual_fb_size) + + for _ in 1:max_a_values + done && break + + result = _generate_siqs_a(ctx, used_a_sets) + result === nothing && continue + a, a_indices, B_comps = result + s = length(a_indices) + + # Precompute inv(a) mod p using factored form (avoids GMP BigInt mod) + # a = prod(factor_base[idx] for idx in a_indices) + @inbounds for j in 1:actual_fb_size + p = factor_base[j] + a_mod_p = 1 + for idx in a_indices + if factor_base[idx] == p + a_mod_p = 0 + break + end + a_mod_p = mod(a_mod_p * mod(factor_base[idx], p), p) + end + if a_mod_p == 0 + inv_a[j] = 0 + else + inv_a[j] = invmod(a_mod_p, p) + end + end + + # Precompute root deltas for incremental Gray code b-switching + # B_delta[v][j] = 2 * B_v mod p * inv_a[j] mod p + for v in 1:s + Bv = B_comps[v] + delta = B_delta[v] + @inbounds for j in 1:actual_fb_size + p = factor_base[j] + Bv_mod_p = Int(_mpz_fdiv_ui(Bv, UInt(p))) + delta[j] = mod(2 * Bv_mod_p * inv_a[j], p) + end + end + + # Initial b (all positive CRT signs) + b = mod(sum(B_comps), a) + if mod(b * b, a) != mod(kn, a) + b = a - b + end + + # Compute initial roots (BigInt mod, once per a) + _compute_siqs_roots!(offset1, offset2, b, a, kn, inv_a, + sqrt_kn_mod, factor_base, actual_fb_size, M, a_indices) + + # First polynomial + _siqs_sieve!(sieve, sieve_len, offset1, offset2, starts1, starts2, + factor_base, log_primes, actual_fb_size, sieve_start_idx, log_init) + poly = MPQSPolynomial(a, b, a_indices) + _siqs_collect!(poly, ctx, sieve, starts1, starts2, + relations, partial_relations, M, large_prime_bound, + dlp_bound, dlp_bound_sq, + tf_exponents, tf_full_exp, tf_remainder, tf_quotient, + tf_ax_b, tf_Qx, tf_gx) + + if length(relations) >= target_relations + break + end + + # Remaining b-values via Gray code incremental root update + num_b = 1 << (s - 1) + for i in 2:num_b + gray_prev = (i - 2) ⊻ ((i - 2) >> 1) + gray_curr = (i - 1) ⊻ ((i - 1) >> 1) + flip_bit = trailing_zeros(gray_prev ⊻ gray_curr) + 1 + + if (gray_curr >> (flip_bit - 1)) & 1 == 1 + b = mod(b - 2 * B_comps[flip_bit], a) + dd = +1 # roots shift right + else + b = mod(b + 2 * B_comps[flip_bit], a) + dd = -1 # roots shift left + end + + # Incremental root update (pure Int arithmetic, no BigInt mod) + delta = B_delta[flip_bit] + @inbounds for j in 1:actual_fb_size + inv_a[j] == 0 && continue + p = factor_base[j] + d = delta[j] + if dd > 0 + o1 = offset1[j] + d; if o1 >= p; o1 -= p; end + offset1[j] = o1 + o2 = offset2[j] + d; if o2 >= p; o2 -= p; end + offset2[j] = o2 + else + o1 = offset1[j] - d; if o1 < 0; o1 += p; end + offset1[j] = o1 + o2 = offset2[j] - d; if o2 < 0; o2 += p; end + offset2[j] = o2 + end + end + + # Recompute roots for primes dividing a (~s primes, needs BigInt) + c = div(b * b - kn, a) + for idx in a_indices + p = factor_base[idx] + b2_mod_p = mod(2 * Int(_mpz_fdiv_ui(b, UInt(p))), p) + c_mod_p = Int(_mpz_fdiv_ui(c, UInt(p))) + if b2_mod_p == 0 + offset1[idx] = -1; offset2[idx] = -1 + else + b_inv = invmod(b2_mod_p, p) + r = mod(-c_mod_p * b_inv, p) + o = mod(r + M, p) + offset1[idx] = o; offset2[idx] = o + end + end + + # Sieve and collect + _siqs_sieve!(sieve, sieve_len, offset1, offset2, starts1, starts2, + factor_base, log_primes, actual_fb_size, sieve_start_idx, log_init) + poly = MPQSPolynomial(a, b, a_indices) + _siqs_collect!(poly, ctx, sieve, starts1, starts2, + relations, partial_relations, M, large_prime_bound, + dlp_bound, dlp_bound_sq, + tf_exponents, tf_full_exp, tf_remainder, tf_quotient, + tf_ax_b, tf_Qx, tf_gx) + + if length(relations) >= target_relations + done = true + break + end + end + end + + if length(relations) < actual_fb_size + 1 + error("failed to factor $n: insufficient smooth relations ($(length(relations)) found, need $(actual_fb_size + 1))") + end + + # GF(2) elimination + parity_vectors = [r.exponents for r in relations] + dependencies = _gf2_eliminate(parity_vectors, actual_fb_size) + + # Try each dependency to find a non-trivial factor + for dep in dependencies + result = _extract_factor(n, kn, k, dep, relations, factor_base) + if result !== nothing + return result + end + end + + error("failed to factor $n") +end diff --git a/test/runtests.jl b/test/runtests.jl index f63e59a..34322a4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -511,3 +511,164 @@ end end @test primes(2^31-20, 2^31-1) == [2147483629, 2147483647] end + +# --- Tests for large integer factorization (002-large-int-factorization) --- + +@testset "Polyalgorithm dispatch" begin + # Perfect power path + p = nextprime(big"100000007") + n2 = p^2 + f2 = Primes._find_factor(n2) + @test f2 == p +end + +@testset "ECM factorization" begin + # Semi-prime with a ~40-bit factor + p1 = big"824633720831" # 40-bit prime + q1 = big"1000000007" # 30-bit prime + n1 = p1 * q1 + f1 = factor(n1) + @test prodfactors(f1) == n1 + @test all(isprime(p) for (p, _) in f1) + + # p ~40 bits, q ~80 bits + p2 = big"824633720831" + q2 = big"1180591620717411303449" # ~80-bit prime + n2 = p2 * q2 + f2 = factor(n2) + @test prodfactors(f2) == n2 + @test all(isprime(p) for (p, _) in f2) +end + +@testset "MPQS factorization" begin + # Semi-prime with two ~60-bit factors + p1 = big"780002082420426809" + q1 = big"810735269523504809437013569" + n1 = p1 * q1 + f1 = factor(n1) + @test prodfactors(f1) == n1 + @test all(isprime(p) for (p, _) in f1) +end + +@testset "BigInt factorization" begin + # factor(BigInt(n)) returns Factorization{BigInt} + n1 = big"2152302898747" + f1 = factor(n1) + @test f1 isa Primes.Factorization{BigInt} + @test prodfactors(f1) == n1 + + # BigInt prime returns single-entry + p = big"359334085968622831041960188598043661065388726959079837" + f2 = factor(p) + @test length(f2) == 1 + @test first(f2) == (p => 1) +end + +@testset "Tonelli-Shanks" begin + # p ≡ 3 (mod 4) — fast path (s == 1) + @test Primes._tonelli_shanks(big"2", 7)^2 % 7 == 2 + # p ≡ 1 (mod 4) — general path + @test Primes._tonelli_shanks(big"2", 17)^2 % 17 == 2 + # n == 0 case + @test Primes._tonelli_shanks(big"0", 7) == 0 + # p == 2 case + @test Primes._tonelli_shanks(big"1", 2) == 1 + # Larger prime requiring full Tonelli-Shanks loop (p ≡ 1 mod 8, s > 1) + @test Primes._tonelli_shanks(big"2", 41)^2 % 41 == 2 +end + +@testset "MPQS parameter selection" begin + # Small number (below minimum) + fb, si = Primes._mpqs_select_params(big"10"^20) + @test fb > 0 + @test si > 0 + # Large number (above maximum) + fb2, si2 = Primes._mpqs_select_params(big"10"^80) + @test fb2 == 16000 + @test si2 == 250000 + # Mid-range (interpolation) + fb3, si3 = Primes._mpqs_select_params(big"10"^50) + @test fb3 > 0 + @test si3 > 0 +end + +@testset "Knuth multiplier selection" begin + n = big"632459103267572196107100983820469021721602147490918660274601" + k = Primes._select_knuth_multiplier(n) + @test k >= 1 + @test isodd(k) + @test k <= 47 +end + +@testset "Factor base construction" begin + kn = big"3" * big"632459103267572196107100983820469021721602147490918660274601" + fb, sqr, logp = Primes._build_factor_base(kn, 20) + @test fb[1] == 2 + @test length(fb) == 20 + @test length(sqr) == 20 + @test length(logp) == 20 + # Verify sqrt_kn_mod values: r^2 ≡ kn (mod p) for odd primes + for i in 2:length(fb) + p = fb[i] + r = sqr[i] + @test mod(r * r, p) == mod(kn, p) + end +end + +@testset "GF(2) Gaussian elimination" begin + # Empty input + deps = Primes._gf2_eliminate_gaussian(BitVector[], 3) + @test isempty(deps) + # Two identical vectors should produce a dependency + v = BitVector([true, false, true]) + deps2 = Primes._gf2_eliminate_gaussian([v, copy(v)], 3) + @test length(deps2) >= 1 + @test sort(deps2[1]) == [1, 2] + # Three vectors with one dependency: v1 ⊻ v2 = v3 + v1 = BitVector([true, false, true, false]) + v2 = BitVector([false, true, true, false]) + v3 = BitVector([true, true, false, false]) # v1 ⊻ v2 + deps3 = Primes._gf2_eliminate_gaussian([v1, v2, v3], 4) + @test length(deps3) >= 1 +end + +@testset "ECM edge cases" begin + # ECM with a small semi-prime + n = big"1000000007" * big"1000000009" + result = Primes.ecm_factor(n, 2000, 50) + @test result !== nothing + @test mod(n, result) == 0 + + # ECM returns nothing when curves are insufficient for a hard number + hard = nextprime(big"10"^30) * nextprime(big"10"^30 + 1000) + result2 = Primes.ecm_factor(hard, 100, 1) + # May or may not find a factor with just 1 curve — no assertion on result +end + +@testset "Polyalgorithm ECM schedule branches" begin + # Small number — exercises the < 128 bits branch + p1 = nextprime(big"10"^10) + q1 = nextprime(big"10"^10 + 1000) + n1 = p1 * q1 + f1 = Primes._find_factor(n1) + @test mod(n1, f1) == 0 && f1 > 1 && f1 < n1 + + # Medium number (~40 digits, >= 128 bits) — exercises the 128-192 bits branch + p2 = nextprime(big"10"^19) + q2 = nextprime(big"10"^19 + 1000) + n2 = p2 * q2 + f2 = Primes._find_factor(n2) + @test mod(n2, f2) == 0 && f2 > 1 && f2 < n2 + + # Perfect power path + @test Primes._find_factor(big"2"^64) == 2 + @test Primes._find_factor(big"7"^3) == 7 +end + +@testset "Large semi-prime factorization (#159)" begin + n = big"632459103267572196107100983820469021721602147490918660274601" + f = factor(n) + @test prodfactors(f) == n + @test all(isprime(p) for (p, _) in f) + @test length(f) == 2 # it's a semi-prime +end