diff --git a/edge-smoothing/vectorscale/shaders/cell-rasterizer.slang b/edge-smoothing/vectorscale/shaders/cell-rasterizer.slang index 0adefdf82f..7cdb0bb252 100644 --- a/edge-smoothing/vectorscale/shaders/cell-rasterizer.slang +++ b/edge-smoothing/vectorscale/shaders/cell-rasterizer.slang @@ -11,8 +11,11 @@ // Multi-hit resolution: top-3 nearest CPs with endpoint-defer rejection. // // Input textures: -// FinalPositions: optimized absolute positions +// PackedPositions: per-CP denormalized geometry (pp, cp, np with ghosts +// pre-applied + t_branch + neighbor indices + is_line) +// written by pack-positions.slang. // CellGraph: original positions + neighbors + flags + packed directions +// (resolve_hit still uses neighbors for color resolution). // Original: input pixel art image // // Output: viewport-sized final image @@ -43,8 +46,8 @@ void main() { layout(location = 0) in vec2 vTexCoord; layout(location = 0) out vec4 FragColor; -layout(set = 0, binding = 2) uniform sampler2D FinalPositions; // optimized CP absolute positions -layout(set = 0, binding = 3) uniform sampler2D CellGraph; // positions + neighbors + flags + directions +layout(set = 0, binding = 2) uniform sampler2D PackedPositions; // pack-positions output: (pp, cp, np, t_branch, prev_ci, next_ci, validity, is_line) per CP across 3 horizontal texels +layout(set = 0, binding = 3) uniform sampler2D CellGraph; // flags + directions (and neighbor indices, now redundant for test_one_cp) layout(set = 0, binding = 4) uniform sampler2D Original; // input pixel art image int IMG_W() { return int(params.OriginalSize.x); } @@ -79,13 +82,48 @@ vec2 read_orig(int cp_idx) { return vec2(val.r, val.g); } -// Read optimized absolute position from FinalPositions. -vec2 read_pos(int cp_idx) { - if (cp_idx < 0 || cp_idx >= NUM_CPS()) return vec2(-1e10); +// Per-CP packed render geometry from pack-positions. 3 horizontal texels: +// col 0 = (pp.x, pp.y, prev_ci_or_-1, _) +// col 1 = (cp.x, cp.y, t_branch, validity 0=skip 1=normal 2=2cp-chain) +// col 2 = (np.x, np.y, next_ci_or_-1, _) +struct PackedCp { + vec2 pp; + vec2 cp; + vec2 np; + float t_branch; + int prev_ci; + int next_ci; + bool is_line; + bool valid; +}; + +PackedCp read_packed_cp(int cp_idx) { + PackedCp r; + r.valid = false; + r.is_line = false; + r.t_branch = 0.5; + r.prev_ci = -1; r.next_ci = -1; + r.pp = vec2(0.0); r.cp = vec2(0.0); r.np = vec2(0.0); + + if (cp_idx < 0 || cp_idx >= NUM_CPS()) return r; int cx, cy_slot; decode_cp(cp_idx, cx, cy_slot); - vec4 val = texelFetch(FinalPositions, ivec2(cx, cy_slot), 0); - return vec2(val.r, val.g); + + vec4 t1 = texelFetch(PackedPositions, ivec2(cx*3 + 1, cy_slot), 0); + if (t1.a < 0.5) return r; // pack-positions wrote skip/inactive + + vec4 t0 = texelFetch(PackedPositions, ivec2(cx*3, cy_slot), 0); + vec4 t2 = texelFetch(PackedPositions, ivec2(cx*3 + 2, cy_slot), 0); + + r.pp = t0.rg; + r.cp = t1.rg; + r.np = t2.rg; + r.t_branch = t1.b; + r.is_line = t1.a > 1.5; + r.prev_ci = (t0.b < -0.5) ? -1 : int(t0.b + 0.5); + r.next_ci = (t2.b < -0.5) ? -1 : int(t2.b + 0.5); + r.valid = true; + return r; } // Decode a neighbor CP index from component encoding. @@ -417,42 +455,16 @@ uint test_one_cp(int ci, vec2 query, inout Hit hits[3], inout int num_hits) { uint flag = read_flags(ci); if (flag == 0u) return 0u; - ivec2 nbrs = read_neighbors(ci); - int prev_ci = nbrs.x; - int next_ci = nbrs.y; - if (prev_ci < 0 && next_ci < 0) return flag; - - bool i_am_endpoint = (prev_ci < 0 || next_ci < 0); - bool two_cp_chain = false; - if (i_am_endpoint) { - int other = (prev_ci < 0) ? next_ci : prev_ci; - bool other_is_end = (read_flags(other) & IS_ENDPOINT) != 0u; - if (!other_is_end) return flag; - if (ci > other) return flag; - two_cp_chain = true; - } + // Per-CP geometry — denormalized by pack-positions. Replaces the + // read_neighbors + 3 read_pos chain + ghost-extension dance with 3 + // texel reads. valid=false means pack-positions wrote skip + // (inactive, isolated, or non-owner of a 2-CP chain). + PackedCp pcp = read_packed_cp(ci); + if (!pcp.valid) return flag; - vec2 cp_real = read_pos(ci); - vec2 prev_pos = (prev_ci >= 0) ? read_pos(prev_ci) : cp_real; - vec2 next_pos = (next_ci >= 0) ? read_pos(next_ci) : cp_real; - - bool prev_is_end = (prev_ci >= 0) && ((read_flags(prev_ci) & IS_ENDPOINT) != 0u); - bool next_is_end = (next_ci >= 0) && ((read_flags(next_ci) & IS_ENDPOINT) != 0u); - - vec2 cp, pp, np; - if (two_cp_chain) { - int other = (prev_ci < 0) ? next_ci : prev_ci; - vec2 other_pos = read_pos(other); - vec2 a0 = (prev_ci < 0) ? cp_real : other_pos; - vec2 a1 = (next_ci < 0) ? cp_real : other_pos; - pp = 1.5 * a0 - 0.5 * a1; - cp = 0.5 * (a0 + a1); - np = 1.5 * a1 - 0.5 * a0; - } else { - cp = cp_real; - pp = prev_is_end ? (2.0 * prev_pos - cp) : prev_pos; - np = next_is_end ? (2.0 * next_pos - cp) : next_pos; - } + vec2 pp = pcp.pp; + vec2 cp = pcp.cp; + vec2 np = pcp.np; // Quick screen: 3-sample distance² reject. Evaluating beval at // t=0, 1/2, 1 with the ghost-extended pp/cp/np gives the real @@ -468,7 +480,7 @@ uint test_one_cp(int ci, vec2 query, inout Hit hits[3], inout int num_hits) { if (quick_d2 > 2.0) return flag; vec2 result; - if (two_cp_chain) { + if (pcp.is_line) { vec2 a0 = 0.5 * (pp + cp); vec2 a1 = 0.5 * (cp + np); result = closest_on_segment(a0, a1, query); @@ -479,23 +491,13 @@ uint test_one_cp(int ci, vec2 query, inout Hit hits[3], inout int num_hits) { float span_d2 = result.y; if (span_d2 >= 1.0) return flag; - float t_branch; - if (two_cp_chain) { - t_branch = 0.5; - } else if (prev_is_end || next_is_end) { - vec2 interior_mid = 0.125 * pp + 0.75 * cp + 0.125 * np; - t_branch = closest_on_span(pp, cp, np, interior_mid).x; - } else { - t_branch = 0.5; - } - // Build candidate Hit and insert via explicit if/else over constant // indices — keeps hits[] register-allocated. Hit cand; cand.d2 = span_d2; cand.t = span_t; cand.cp_idx = ci; - cand.prev_ci = prev_ci; cand.next_ci = next_ci; + cand.prev_ci = pcp.prev_ci; cand.next_ci = pcp.next_ci; cand.cp_pos = cp; cand.prev_pos = pp; cand.next_pos = np; - cand.t_branch = t_branch; cand.is_line = two_cp_chain; + cand.t_branch = pcp.t_branch; cand.is_line = pcp.is_line; if (span_d2 < hits[0].d2) { hits[2] = hits[1]; diff --git a/edge-smoothing/vectorscale/shaders/pack-positions.slang b/edge-smoothing/vectorscale/shaders/pack-positions.slang new file mode 100644 index 0000000000..4e62cd29c7 --- /dev/null +++ b/edge-smoothing/vectorscale/shaders/pack-positions.slang @@ -0,0 +1,360 @@ +#version 450 +#pragma format R32G32B32A32_SFLOAT + +// Pass: Denormalize per-CP render geometry into PackedPositions. +// +// Reads FinalPositions (post-tjunction stem snap) + CellGraph +// (neighbors, flags) and packs each CP's complete render geometry +// into 3 horizontally-adjacent texels: +// +// (cx*3 + 0, cy_slot) = (pp.x, pp.y, prev_ci_as_float, 0) +// (cx*3 + 1, cy_slot) = (cp.x, cp.y, t_branch, validity) +// validity: 0.0 = skip / inactive, +// 1.0 = valid normal, +// 2.0 = valid 2-CP chain (is_line) +// (cx*3 + 2, cy_slot) = (np.x, np.y, next_ci_as_float, 0) +// +// pp / np are already ghost-extended (pp = 2·prev - cp etc.) for +// endpoint neighbors. t_branch is computed in the right way per CP +// type (see the t_branch dispatch in main()): +// - IS_CROSSING: 2D Newton iteration on F(t,s) = B_a(t) - B_b(s) = 0, +// starting from (0.5, 0.5). +// - 2-CP chain (degenerate stem): t_branch = 0.5; pp/cp/np built as +// a straight line so the rasterizer's is_line path takes over. +// - One-sided clamped Bezier: closed-form cubic project of the +// interior B-spline midpoint onto the clamped span. +// - Else: t_branch = 0.5. +// +// Cost: ~once-per-frame O(num_cps) work — 3 fragments per CP slot, so +// 6·corners_w·corners_h fragments total. Drops per-pixel work in the +// rasterizer's test_one_cp from ~6 fetches + ghost construction + +// cubic/Newton solve down to 4 fetches (1 flag + 3 packed reads). +// +// resolve_hit still needs neighbor flags/dirs for color resolution, so +// the neighbor indices stay encoded in the B channels of texels 0/2. + +layout(push_constant) uniform Push { + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO { + mat4 MVP; + vec4 CellGraphSize; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 vTexCoord; + +void main() { + gl_Position = global.MVP * Position; + vTexCoord = TexCoord; +} + +#pragma stage fragment +layout(location = 0) in vec2 vTexCoord; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 2) uniform sampler2D FinalPositions; // post-tjunction stem-snapped positions +layout(set = 0, binding = 3) uniform sampler2D CellGraph; // neighbors + flags + +int IMG_W() { return int(params.OriginalSize.x); } +int IMG_H() { return int(params.OriginalSize.y); } +int CORNERS_W() { return IMG_W() + 1; } +int CORNERS_H() { return IMG_H() + 1; } +int NUM_CPS() { return CORNERS_W() * CORNERS_H() * 2; } + +const uint IS_CROSSING = 64u; +const uint IS_ENDPOINT = 128u; + +void decode_cp(int cp_idx, out int cx, out int cy_slot) { + int cp_half = cp_idx / 2; + int cp_slot = cp_idx & 1; + cx = cp_half % CORNERS_W(); + int cy = cp_half / CORNERS_W(); + cy_slot = cy * 2 + cp_slot; +} + +vec2 read_pos(int cp_idx) { + if (cp_idx < 0 || cp_idx >= NUM_CPS()) return vec2(0.0); + int cx, cy_slot; + decode_cp(cp_idx, cx, cy_slot); + return texelFetch(FinalPositions, ivec2(cx, cy_slot), 0).rg; +} + +// ---- B-spline curve-curve intersection (2D Newton) ---- +// +// Solves F(t, s) = B_a(t) - B_b(s) = 0 from (t, s) = (0.5, 0.5). The +// optimizer keeps crossings near the grid corner so the initial guess +// is within ~0.1 of the answer; quadratic Newton convergence drives +// the residual below f32 epsilon in 3 iterations (4 for safety). +// Returns vec2(t_a, t_b). +// +// Each step: J·Δ = -F where J is the 2×2 partial-derivative matrix +// J11 = ∂Fx/∂t = 2·aa.x·t + ba.x J12 = ∂Fx/∂s = -(2·ab.x·s + bb.x) +// J21 = ∂Fy/∂t = 2·aa.y·t + ba.y J22 = ∂Fy/∂s = -(2·ab.y·s + bb.y) +// Inverted analytically. The early-break guard on |det(J)| < 1e-12 +// is the tangent / parallel-curves case; in practice the pipeline +// guarantees a real crossing so this never fires, but it keeps the +// shader well-defined if a degenerate input ever shows up. +vec2 bspline_intersect(vec2 a_p0, vec2 a_p1, vec2 a_p2, + vec2 b_p0, vec2 b_p1, vec2 b_p2) { + vec2 aa = 0.5 * a_p0 - a_p1 + 0.5 * a_p2; + vec2 ba = -a_p0 + a_p1; + vec2 ca = 0.5 * a_p0 + 0.5 * a_p1; + vec2 ab = 0.5 * b_p0 - b_p1 + 0.5 * b_p2; + vec2 bb = -b_p0 + b_p1; + vec2 cb = 0.5 * b_p0 + 0.5 * b_p1; + + float t = 0.5; + float s = 0.5; + for (int iter = 0; iter < 4; iter++) { + vec2 fa = aa * t * t + ba * t + ca; + vec2 fb = ab * s * s + bb * s + cb; + vec2 f = fa - fb; + vec2 dft = 2.0 * aa * t + ba; + vec2 dfs = -(2.0 * ab * s + bb); + float det = dft.x * dfs.y - dft.y * dfs.x; + if (abs(det) < 1e-12) break; + float dt = ( dfs.y * f.x - dfs.x * f.y) / det; + float ds = (-dft.y * f.x + dft.x * f.y) / det; + t -= dt; + s -= ds; + } + return vec2(clamp(t, 0.0, 1.0), clamp(s, 0.0, 1.0)); +} + +int decode_neighbor(vec4 val) { + if (val.r < -0.5) return -1; + int ncx = int(val.r + 0.5); + int ncy = int(val.g + 0.5); + int nslot = int(val.b + 0.5); + return (ncy * CORNERS_W() + ncx) * 2 + nslot; +} + +uint read_flags(int cp_idx) { + if (cp_idx < 0 || cp_idx >= NUM_CPS()) return 0u; + int cx, cy_slot; + decode_cp(cp_idx, cx, cy_slot); + int base_row = cy_slot * 3; + return uint(texelFetch(CellGraph, ivec2(cx, base_row), 0).b + 0.5); +} + +// Closest-point cubic solver — must match cell-rasterizer.slang's +// closest_on_span exactly so that t_branch values agree with what the +// rasterizer would compute. Inlined here because pack-positions runs once +// per frame (cheap) while the rasterizer runs per pixel. +float span_closest_t(vec2 p0, vec2 p1, vec2 p2, vec2 pt) { + vec2 a = 0.5 * (p0 - 2.0 * p1 + p2); + vec2 b = p1 - p0; + vec2 e = 0.5 * (p0 + p1) - pt; + + float c3 = 2.0 * dot(a, a); + float c2 = 3.0 * dot(a, b); + float c1 = 2.0 * dot(a, e) + dot(b, b); + float c0 = dot(b, e); + + #define EVAL_D2(t) dot((a*(t)+b)*(t)+e, (a*(t)+b)*(t)+e) + float d0 = EVAL_D2(0.0); + float d1 = EVAL_D2(1.0); + float best_d2 = d0; float best_t = 0.0; + if (d1 < best_d2) { best_d2 = d1; best_t = 1.0; } + + if (abs(c3) < 1e-12) { + if (abs(c2) > 1e-12) { + float disc = c1*c1 - 4.0*c2*c0; + if (disc >= 0.0) { + float sq = sqrt(disc); + float qq = -0.5 * (c1 + sign(c1) * sq); + float t1 = qq / c2; + float t2 = c0 / qq; + if (t1 > 0.0 && t1 < 1.0) { float dd = EVAL_D2(t1); if (dd < best_d2) { best_d2 = dd; best_t = t1; } } + if (t2 > 0.0 && t2 < 1.0) { float dd = EVAL_D2(t2); if (dd < best_d2) { best_d2 = dd; best_t = t2; } } + } + } else if (abs(c1) > 1e-12) { + float t1 = -c0 / c1; + if (t1 > 0.0 && t1 < 1.0) { float dd = EVAL_D2(t1); if (dd < best_d2) { best_d2 = dd; best_t = t1; } } + } + } else { + float inv3a = 1.0 / (3.0 * c3); + float shift = -c2 * inv3a; + float p = (3.0*c3*c1 - c2*c2) / (3.0*c3*c3); + float q = (2.0*c2*c2*c2 - 9.0*c3*c2*c1 + 27.0*c3*c3*c0) / (27.0*c3*c3*c3); + float disc = q*q/4.0 + p*p*p/27.0; + + if (disc > 1e-12) { + float sq = sqrt(disc); + float hq = -q * 0.5; + float u1 = sign(hq+sq) * pow(abs(hq+sq), 1.0/3.0); + float u2 = sign(hq-sq) * pow(abs(hq-sq), 1.0/3.0); + float t1 = u1 + u2 + shift; + if (t1 > 0.0 && t1 < 1.0) { float dd = EVAL_D2(t1); if (dd < best_d2) { best_d2 = dd; best_t = t1; } } + } else { + float r = sqrt(max(-p*p*p/27.0, 0.0)); + float phi = (r < 1e-15) ? 0.0 : acos(clamp(-q/(2.0*r), -1.0, 1.0)); + float cube_r = 2.0 * sign(r) * pow(abs(r), 1.0/3.0); + const float TAU = 6.283185307; + for (int k = 0; k < 3; k++) { + float angle = (phi + TAU * float(k)) / 3.0; + float t1 = cube_r * cos(angle) + shift; + if (t1 > 0.0 && t1 < 1.0) { float dd = EVAL_D2(t1); if (dd < best_d2) { best_d2 = dd; best_t = t1; } } + } + } + } + #undef EVAL_D2 + return best_t; +} + +void main() { + ivec2 opos = ivec2(vTexCoord * params.OutputSize.xy); + + // Decode (cx, slot_within_cp, cy_slot) from the output coordinate. + // The texture is 3*corners_w wide, so column = cx*3 + which_pos. + int which_pos = opos.x % 3; // 0=pp, 1=cp, 2=np + int cx = opos.x / 3; + int cy_slot = opos.y; + + if (cx >= CORNERS_W() || cy_slot >= CORNERS_H() * 2) { + FragColor = vec4(0.0); + return; + } + + int cy = cy_slot / 2; + int slot = cy_slot & 1; + int ci = (cy * CORNERS_W() + cx) * 2 + slot; + + // Read flags + neighbors. Inactive CPs write zeros — the rasterizer + // already skips them via its own flag check. + uint flag = read_flags(ci); + if (flag == 0u) { + FragColor = vec4(0.0); + return; + } + + int base_row = cy_slot * 3; + int prev_ci = decode_neighbor(texelFetch(CellGraph, ivec2(cx, base_row + 1), 0)); + int next_ci = decode_neighbor(texelFetch(CellGraph, ivec2(cx, base_row + 2), 0)); + + // CPs with no neighbors (e.g. fully-isolated chain endpoints owned by + // someone else's clamped extension) — write zeros. + if (prev_ci < 0 && next_ci < 0) { + FragColor = vec4(0.0); + return; + } + + // 2-CP-chain detection: both ends are endpoint markers and we own the + // span. Only the lower-indexed endpoint renders. Mirror the + // rasterizer's existing logic so the packed geometry matches what + // test_one_cp would have constructed. + bool i_am_endpoint = (prev_ci < 0 || next_ci < 0); + bool two_cp_chain = false; + if (i_am_endpoint) { + int other = (prev_ci < 0) ? next_ci : prev_ci; + bool other_is_end = (read_flags(other) & IS_ENDPOINT) != 0u; + if (!other_is_end || ci > other) { + FragColor = vec4(0.0); + return; + } + two_cp_chain = true; + } + + vec2 cp_real = read_pos(ci); + vec2 prev_pos = (prev_ci >= 0) ? read_pos(prev_ci) : cp_real; + vec2 next_pos = (next_ci >= 0) ? read_pos(next_ci) : cp_real; + + bool prev_is_end = (prev_ci >= 0) && ((read_flags(prev_ci) & IS_ENDPOINT) != 0u); + bool next_is_end = (next_ci >= 0) && ((read_flags(next_ci) & IS_ENDPOINT) != 0u); + + vec2 cp, pp, np; + if (two_cp_chain) { + // Render as a straight line between the two markers. Pick + // p0=(3·a0-a1)/2, p1=midpoint, p2=(3·a1-a0)/2 so beval gives + // B(t)=lerp(a0,a1,t). Closest-point queries dispatch to a line + // solver via is_line in the rasterizer. + int other = (prev_ci < 0) ? next_ci : prev_ci; + vec2 other_pos = read_pos(other); + vec2 a0 = (prev_ci < 0) ? cp_real : other_pos; + vec2 a1 = (next_ci < 0) ? cp_real : other_pos; + pp = 1.5 * a0 - 0.5 * a1; + cp = 0.5 * (a0 + a1); + np = 1.5 * a1 - 0.5 * a0; + } else { + cp = cp_real; + pp = prev_is_end ? (2.0 * prev_pos - cp) : prev_pos; + np = next_is_end ? (2.0 * next_pos - cp) : next_pos; + } + + // t_branch: rasterizer's branch threshold between prev_dir and + // next_dir for color resolution. Mirrors test_one_cp's logic. + // + // Crossings: solve the curve-curve intersection inline (Newton + // iteration on F(t,s) = B_a(t) - B_b(s) = 0). Need both this slot's + // own neighbors and the partner slot's neighbors — slot 0 holds + // the N-S chain, slot 1 holds E-W. cp_a and cp_b are the same + // physical position (both slots of a crossing are co-located). + float t_branch; + if ((flag & IS_CROSSING) != 0u) { + // Read partner-slot neighbor indices (partner is at cy_slot ^ 1 + // within the same cx column). + int p_base_row = (cy_slot ^ 1) * 3; + int p_prev = decode_neighbor(texelFetch(CellGraph, ivec2(cx, p_base_row + 1), 0)); + int p_next = decode_neighbor(texelFetch(CellGraph, ivec2(cx, p_base_row + 2), 0)); + + int n_idx, s_idx, e_idx, w_idx; + if (slot == 0) { + n_idx = prev_ci; s_idx = next_ci; // own (slot 0) = N-S + e_idx = p_prev; w_idx = p_next; // partner (slot 1) = E-W + } else { + n_idx = p_prev; s_idx = p_next; // partner (slot 0) = N-S + e_idx = prev_ci; w_idx = next_ci; // own (slot 1) = E-W + } + + if (n_idx >= 0 && s_idx >= 0 && e_idx >= 0 && w_idx >= 0) { + vec2 n_pos = read_pos(n_idx); + vec2 s_pos = read_pos(s_idx); + vec2 e_pos = read_pos(e_idx); + vec2 w_pos = read_pos(w_idx); + + bool n_is_end = (read_flags(n_idx) & IS_ENDPOINT) != 0u; + bool s_is_end = (read_flags(s_idx) & IS_ENDPOINT) != 0u; + bool e_is_end = (read_flags(e_idx) & IS_ENDPOINT) != 0u; + bool w_is_end = (read_flags(w_idx) & IS_ENDPOINT) != 0u; + + vec2 n_in = n_is_end ? (2.0 * n_pos - cp_real) : n_pos; + vec2 s_in = s_is_end ? (2.0 * s_pos - cp_real) : s_pos; + vec2 e_in = e_is_end ? (2.0 * e_pos - cp_real) : e_pos; + vec2 w_in = w_is_end ? (2.0 * w_pos - cp_real) : w_pos; + + vec2 t_pair = bspline_intersect(n_in, cp_real, s_in, + e_in, cp_real, w_in); + t_branch = (slot == 0) ? t_pair.x : t_pair.y; + } else { + t_branch = 0.5; + } + } else if (two_cp_chain) { + t_branch = 0.5; + } else if (prev_is_end || next_is_end) { + // One-sided clamped Bezier: parameterization is shifted, so the + // t at which the rendered curve reaches the symmetric + // "before/after sc" location of an interior span isn't 0.5. + // Project the interior-span midpoint onto the clamped span. + vec2 interior_mid = 0.125 * pp + 0.75 * cp + 0.125 * np; + t_branch = span_closest_t(pp, cp, np, interior_mid); + } else { + t_branch = 0.5; + } + + float validity = two_cp_chain ? 2.0 : 1.0; + float prev_ci_f = (prev_ci < 0) ? -1.0 : float(prev_ci); + float next_ci_f = (next_ci < 0) ? -1.0 : float(next_ci); + + vec4 out_texel; + if (which_pos == 0) out_texel = vec4(pp.x, pp.y, prev_ci_f, 0.0); + else if (which_pos == 1) out_texel = vec4(cp.x, cp.y, t_branch, validity); + else out_texel = vec4(np.x, np.y, next_ci_f, 0.0); + FragColor = out_texel; +} diff --git a/edge-smoothing/vectorscale/shaders/update-tjunction.slang b/edge-smoothing/vectorscale/shaders/update-tjunction.slang index 9b7811d5dc..493ab584e4 100644 --- a/edge-smoothing/vectorscale/shaders/update-tjunction.slang +++ b/edge-smoothing/vectorscale/shaders/update-tjunction.slang @@ -1,24 +1,23 @@ #version 450 #pragma format R32G32B32A32_SFLOAT -// Pass: Post-optimization junction position correction. +// Pass: T-junction stem CP snap. // -// Crossings: ghost-aware inverse B-spline correction (each slot -// independently). Coefficients depend on whether prev/next is a -// chain endpoint, which causes the rasterizer to ghost-adjust -// that side of the clamped Bezier. -// T-junctions: through-CP is left at the optimizer's value (not -// overwritten). Stem CPs snap onto the rendered through-curve -// via the ghost-aware algebraic B(0.5) formula. +// Through-CPs (IS_TJUNCTION) and crossings (IS_CROSSING) pass through +// at their optimizer-final positions. Stem CPs (the slot-1 partner of +// an IS_TJUNCTION through-CP) snap onto the rendered through-curve via +// the ghost-aware algebraic B(0.5) formula. Coefficients depend on +// whether the through-CP's prev/next is a chain endpoint. // -// Uses Opt2 (original optimizer output) for the crossing-correction -// grid target, Source (previous pass) for neighbor positions. Multi- -// pass iteration: dispatch a few times so junction CPs whose neighbors -// are themselves junctions converge. +// Crossing curve-curve intersection is computed in pack-positions; this +// pass leaves the crossing CP positions alone. +// +// Multi-pass iteration: dispatched 3× so stem CPs whose chain +// neighbors are themselves stems converge (Jacobi iteration on the +// stem-position system, contraction ≈ 0.17/pass). // // Input textures: // Source (previous pass): current positions (neighbors) -// Opt2: original optimizer output (center weight) // CellGraph: neighbors + flags // // Output: (R=pos.x, G=pos.y, B=0, A=0) @@ -33,7 +32,6 @@ layout(push_constant) uniform Push { layout(std140, set = 0, binding = 0) uniform UBO { mat4 MVP; vec4 CellGraphSize; - vec4 Opt2Size; } global; #pragma stage vertex @@ -52,7 +50,6 @@ layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Source; // current positions (neighbors) layout(set = 0, binding = 3) uniform sampler2D CellGraph; // pass 3 (neighbors + flags) -layout(set = 0, binding = 4) uniform sampler2D Opt2; // original optimizer output int IMG_W() { return int(params.OriginalSize.x); } int IMG_H() { return int(params.OriginalSize.y); } @@ -61,7 +58,6 @@ int CORNERS_H() { return IMG_H() + 1; } int NUM_CPS() { return CORNERS_W() * CORNERS_H() * 2; } const uint IS_TJUNCTION = 32u; -const uint IS_CROSSING = 64u; const uint IS_ENDPOINT = 128u; void decode_cp(int cp_idx, out int cx, out int cy_slot) { @@ -80,14 +76,6 @@ vec2 read_pos(int cp_idx) { return vec2(val.r, val.g); } -vec2 read_orig_pos(int cp_idx) { - if (cp_idx < 0 || cp_idx >= NUM_CPS()) return vec2(0.0); - int cx, cy_slot; - decode_cp(cp_idx, cx, cy_slot); - vec4 val = texelFetch(Opt2, ivec2(cx, cy_slot), 0); - return vec2(val.r, val.g); -} - int decode_neighbor(vec4 val) { if (val.r < -0.5) return -1; int ncx = int(val.r + 0.5); @@ -131,43 +119,14 @@ void main() { uint flags = read_flags(i); vec2 curr_pos = read_pos(i); - vec2 orig_pos = read_orig_pos(i); int partner = i ^ 1; uint partner_flags = read_flags(partner); - // Valence-4 crossing: ghost-aware inverse B-spline correction. - // Each slot processed independently (different pp/np). The - // coefficients (a, bp, bn) solve B_rendered(0.5) = grid for cp, - // accounting for the rasterizer's ghost extension of any endpoint - // neighbor on this slot's chain. - if ((flags & IS_CROSSING) != 0u) { - ivec2 nbrs = read_neighbors(i); - int prev_idx = nbrs.x; - int next_idx = nbrs.y; - - if (prev_idx >= 0 && next_idx >= 0) { - vec2 prev_pos = read_pos(prev_idx); - vec2 next_pos = read_pos(next_idx); - bool prev_is_end = (read_flags(prev_idx) & IS_ENDPOINT) != 0u; - bool next_is_end = (read_flags(next_idx) & IS_ENDPOINT) != 0u; - float a, bp, bn; - if (!prev_is_end && !next_is_end) { a = 0.75; bp = 0.125; bn = 0.125; } - else if (prev_is_end && !next_is_end) { a = 0.625; bp = 0.25; bn = 0.125; } - else if (!prev_is_end && next_is_end) { a = 0.625; bp = 0.125; bn = 0.25; } - else { a = 0.5; bp = 0.25; bn = 0.25; } - vec2 corrected = (orig_pos - bp * prev_pos - bn * next_pos) / a; - FragColor = vec4(corrected.x, corrected.y, 0.0, 0.0); - return; - } - } else if ((flags & IS_TJUNCTION) != 0u) { - // T-junction through-CP: leave at the optimizer's position. - FragColor = vec4(curr_pos.x, curr_pos.y, 0.0, 0.0); - return; - } else if ((partner_flags & IS_TJUNCTION) != 0u && (flags & ~IS_ENDPOINT) == 1u) { + if ((partner_flags & IS_TJUNCTION) != 0u && (flags & ~IS_ENDPOINT) == 1u) { // Stem CPs carry flags = IS_USED | IS_ENDPOINT (= 129); mask // IS_ENDPOINT before testing that this slot is a plain stem. - // Stem: snap onto the rendered through-curve via ghost-aware B(0.5). + // Snap onto the rendered through-curve via ghost-aware B(0.5). ivec2 partner_nbrs = read_neighbors(partner); int prev_idx = partner_nbrs.x; int next_idx = partner_nbrs.y; @@ -189,6 +148,9 @@ void main() { } } - // Default: pass through position unchanged + // Default: pass through position unchanged. Crossings and T-junction + // through-CPs both end up here — they keep the optimizer's position; + // the crossing's curve-curve intersection parameter is computed + // downstream in pack-positions. FragColor = vec4(curr_pos.x, curr_pos.y, 0.0, 0.0); } diff --git a/edge-smoothing/vectorscale/vectorscale.slangp b/edge-smoothing/vectorscale/vectorscale.slangp index cac088e971..d0bf772ac7 100644 --- a/edge-smoothing/vectorscale/vectorscale.slangp +++ b/edge-smoothing/vectorscale/vectorscale.slangp @@ -1,15 +1,16 @@ # Vibeboy vectorize shader preset # Works with any input resolution (not just Game Boy 160x144). # Intermediate textures are over-allocated using source-relative scales: -# Cell graph: 2W x 7H (needs (W+1) x 6(H+1), works for H >= 6) -# Positions: 2W x 3.5H (needs (W+1) x 2(H+1), works for H >= 2) +# Cell graph: 2W x 7H (needs (W+1) x 6(H+1), works for H >= 6) +# Positions: 2W x 3.5H (needs (W+1) x 2(H+1), works for H >= 2) +# PackedPositions: 6W x 3.5H (3 horizontally-adjacent texels per CP slot) # # Framebuffer formats are set per-shader via `#pragma format` (FP32 for # position-storing passes). DO NOT add `float_framebuffer = true` here # — it forces RGBA16F on some backends and silently overrides the pragma, # causing sub-pixel position rounding visible as hairline misalignment # at the rasterizer. -shaders = 10 +shaders = 11 shader0 = shaders/similarity-graph.slang alias0 = SimilarityGraph @@ -57,9 +58,10 @@ scale_type5 = source scale5 = 1.0 wrap_mode5 = clamp_to_border -# T-junction/crossing correction: 3 iterations for convergence. -# Each pass reads neighbor positions from Source (previous pass) and -# original center positions from Opt2. +# T-junction stem CP snap (3 iterations for convergence). Only stem CPs +# are repositioned here — through-CPs and crossings pass through with +# their optimizer-final positions intact. The crossing curve-curve +# intersection is handled in pack-positions below. shader6 = shaders/update-tjunction.slang alias6 = TJunc1 filter_linear6 = false @@ -81,9 +83,25 @@ scale_type8 = source scale8 = 1.0 wrap_mode8 = clamp_to_border -shader9 = shaders/cell-rasterizer.slang -alias9 = FinalOutput -filter_linear9 = true -scale_type9 = viewport -scale9 = 1.0 +# Denormalize per-CP geometry: pack each CP's (pp, cp, np) ghost-extended +# triple, t_branch, neighbor indices, validity, and is_line flag into 3 +# horizontally-adjacent texels of PackedPositions. For IS_CROSSING CPs, +# t_branch is the curve-curve intersection parameter (Newton iteration on +# the two B-spline spans, inlined). Lets the rasterizer skip +# neighbor-index decode + neighbor position fetches + ghost construction +# + cubic/Newton solves in its hot loop. +shader9 = shaders/pack-positions.slang +alias9 = PackedPositions +filter_linear9 = false +scale_type_x9 = source +scale_x9 = 3.0 +scale_type_y9 = source +scale_y9 = 1.0 wrap_mode9 = clamp_to_border + +shader10 = shaders/cell-rasterizer.slang +alias10 = FinalOutput +filter_linear10 = true +scale_type10 = viewport +scale10 = 1.0 +wrap_mode10 = clamp_to_border