Skip to content

Commit af4dc49

Browse files
NullVoxPopuliclaude
andcommitted
feat: adjust loc.start/end when stripping text node whitespace
Previously, whitespace stripping only modified TextNode.chars — the loc info pointed at positions in the original (unstripped) source, so downstream consumers that read loc got wrong line/column numbers. Add advanceStart/retractEnd helpers that walk the stripped characters, counting newlines to compute the new line/column. Use them from the central stripTextStart / stripTextEnd helpers which the explicit and standalone stripping both go through. Also: when a BlockStatement is standalone, strip the leading newline of its program body's first text (consumed by block-open) and the trailing inline whitespace of its program/inverse body's last text (consumed by block-close). This matches handlebars' standalone rule for block tags. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent e54f2cd commit af4dc49

1 file changed

Lines changed: 123 additions & 17 deletions

File tree

packages/@glimmer/syntax/lib/parser/tokenizer-event-handlers.ts

Lines changed: 123 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,17 @@ function convertToASTv1(raw: Record<string, unknown>, source: src.Source): ASTv1
269269
// Applies `{{~` and `~}}` strip flags by trimming whitespace from neighboring
270270
// text nodes. Operates on the raw JSON AST before location conversion.
271271

272+
interface PlainLoc {
273+
start: { line: number; column: number };
274+
end: { line: number; column: number };
275+
}
276+
272277
interface Stripable {
273278
type: string;
274279
chars?: string;
280+
// plain during stripping pass, SourceSpan later — declared unknown
281+
// because both shapes flow through here.
282+
loc?: unknown;
275283
strip?: { open: boolean; close: boolean };
276284
__strip?: { open: boolean; close: boolean };
277285
openStrip?: { open: boolean; close: boolean };
@@ -325,6 +333,20 @@ function cleanupStripFlags(node: unknown): void {
325333
}
326334
}
327335

336+
function stripTextEnd(node: Stripable, pattern: RegExp): void {
337+
if (node.type !== 'TextNode' || typeof node.chars !== 'string') return;
338+
const original = node.chars;
339+
node.chars = original.replace(pattern, '');
340+
retractEnd(node, original.length - node.chars.length, original);
341+
}
342+
343+
function stripTextStart(node: Stripable, pattern: RegExp): void {
344+
if (node.type !== 'TextNode' || typeof node.chars !== 'string') return;
345+
const original = node.chars;
346+
node.chars = original.replace(pattern, '');
347+
advanceStart(node, original.length - node.chars.length, original);
348+
}
349+
328350
function stripBodyWhitespace(body: Stripable[]): void {
329351
// Pass 1: apply explicit strip flags (~) and BlockStatement inner strips.
330352
for (let i = 0; i < body.length; i++) {
@@ -335,15 +357,11 @@ function stripBodyWhitespace(body: Stripable[]): void {
335357

336358
if (leftStrip && i > 0) {
337359
const prev = body[i - 1];
338-
if (prev?.type === 'TextNode' && typeof prev.chars === 'string') {
339-
prev.chars = prev.chars.replace(/[ \t\r\n]+$/u, '');
340-
}
360+
if (prev) stripTextEnd(prev, /[ \t\r\n]+$/u);
341361
}
342362
if (rightStrip && i + 1 < body.length) {
343363
const next = body[i + 1];
344-
if (next?.type === 'TextNode' && typeof next.chars === 'string') {
345-
next.chars = next.chars.replace(/^[ \t\r\n]+/u, '');
346-
}
364+
if (next) stripTextStart(next, /^[ \t\r\n]+/u);
347365
}
348366

349367
// BlockStatement has additional inner stripping:
@@ -416,16 +434,29 @@ function applyStandaloneStripping(body: Stripable[]): void {
416434
const hasNewline = containsNewline(prev) || containsNewline(next);
417435

418436
if (prevOk && nextOk && hasNewline) {
419-
// Strip trailing inline whitespace on prev (up to but NOT including
420-
// the preceding newline — the newline itself marks where the content
421-
// on the standalone line ends, so we leave it in place). The next
422-
// text has its leading newline consumed instead.
423-
if (prev?.type === 'TextNode' && typeof prev.chars === 'string') {
424-
prev.chars = prev.chars.replace(/[ \t]+$/u, '');
425-
}
437+
// Strip trailing inline whitespace on prev (leave the preceding
438+
// newline intact so body boundary text nodes don't vanish).
439+
if (prev) stripTextEnd(prev, /[ \t]+$/u);
426440
// Strip leading whitespace + the trailing newline from next.
427-
if (next?.type === 'TextNode' && typeof next.chars === 'string') {
428-
next.chars = next.chars.replace(/^[ \t]*(?:\r\n|\r|\n)/u, '');
441+
if (next) stripTextStart(next, /^[ \t]*(?:\r\n|\r|\n)/u);
442+
443+
// If this is a standalone BlockStatement, also strip the leading
444+
// newline from its program body's first text (consumed by the block
445+
// open tag) and the trailing inline whitespace from its program or
446+
// inverse body's last text (consumed by the block close tag).
447+
if (stmt?.type === 'BlockStatement') {
448+
const program = stmt.program?.body;
449+
const inverse = stmt.inverse?.body;
450+
451+
if (program && program.length > 0) {
452+
const first = program[0];
453+
if (first) stripTextStart(first, /^[ \t]*(?:\r\n|\r|\n)/u);
454+
}
455+
const trailingBody = (inverse && inverse.length > 0 ? inverse : program) || [];
456+
if (trailingBody.length > 0) {
457+
const last = trailingBody[trailingBody.length - 1];
458+
if (last) stripTextEnd(last, /[ \t]+$/u);
459+
}
429460
}
430461
}
431462
}
@@ -461,15 +492,90 @@ function isEmptyOrWhitespaceToNewline(
461492
function stripFirstTextLeading(body: Stripable[]): void {
462493
const first = body[0];
463494
if (first?.type === 'TextNode' && typeof first.chars === 'string') {
464-
first.chars = first.chars.replace(/^[ \t\r\n]+/u, '');
495+
const original = first.chars;
496+
first.chars = original.replace(/^[ \t\r\n]+/u, '');
497+
advanceStart(first, original.length - first.chars.length, original);
465498
}
466499
}
467500

468501
function stripLastTextTrailing(body: Stripable[]): void {
469502
const last = body[body.length - 1];
470503
if (last?.type === 'TextNode' && typeof last.chars === 'string') {
471-
last.chars = last.chars.replace(/[ \t\r\n]+$/u, '');
504+
const original = last.chars;
505+
last.chars = original.replace(/[ \t\r\n]+$/u, '');
506+
retractEnd(last, original.length - last.chars.length, original);
507+
}
508+
}
509+
510+
// Move a TextNode's loc.start forward by `n` characters (across newlines).
511+
function advanceStart(node: Stripable, n: number, original: string): void {
512+
if (n <= 0) return;
513+
const loc = node.loc as PlainLoc | undefined;
514+
if (!loc || !isPlainLocObj(loc)) return;
515+
let { line, column } = loc.start;
516+
for (let i = 0; i < n; i++) {
517+
const ch = original[i];
518+
if (ch === '\n') {
519+
line++;
520+
column = 0;
521+
} else if (ch === '\r') {
522+
// treat \r and \r\n as single newline; peek next
523+
if (original[i + 1] === '\n') continue;
524+
line++;
525+
column = 0;
526+
} else {
527+
column++;
528+
}
529+
}
530+
loc.start = { line, column };
531+
}
532+
533+
// Move a TextNode's loc.end backward by `n` characters (across newlines).
534+
function retractEnd(node: Stripable, n: number, original: string): void {
535+
if (n <= 0) return;
536+
const loc = node.loc as PlainLoc | undefined;
537+
if (!loc || !isPlainLocObj(loc)) return;
538+
let { line, column } = loc.end;
539+
for (let i = 0; i < n; i++) {
540+
const ch = original[original.length - 1 - i];
541+
if (ch === '\n') {
542+
// '\r\n' treated as one — peek ahead (toward start) for '\r'
543+
if (original[original.length - 2 - i] === '\r') {
544+
i++;
545+
}
546+
line--;
547+
// Recompute column: find the length of the line we're now on by
548+
// scanning backward to previous newline.
549+
let col = 0;
550+
for (let j = original.length - 2 - i; j >= 0; j--) {
551+
if (original[j] === '\n' || original[j] === '\r') break;
552+
col++;
553+
}
554+
column = col;
555+
} else if (ch === '\r') {
556+
line--;
557+
let col = 0;
558+
for (let j = original.length - 2 - i; j >= 0; j--) {
559+
if (original[j] === '\n' || original[j] === '\r') break;
560+
col++;
561+
}
562+
column = col;
563+
} else {
564+
column = Math.max(0, column - 1);
565+
}
472566
}
567+
loc.end = { line, column };
568+
}
569+
570+
function isPlainLocObj(value: unknown): value is PlainLoc {
571+
return (
572+
value !== null &&
573+
typeof value === 'object' &&
574+
'start' in value &&
575+
'end' in value &&
576+
// Real SourceSpan has methods; plain objects don't.
577+
typeof (value as { until?: unknown }).until !== 'function'
578+
);
473579
}
474580

475581
function getOpenStrip(stmt: Stripable): boolean {

0 commit comments

Comments
 (0)