@@ -263,11 +263,13 @@ const deepMerge = (target, source) => {
263263} ;
264264
265265/**
266- * Collects URL fingerprints (spec_url and mdn_url) for each feature.
266+ * Collects URL fingerprints (spec_url and mdn_url) for each feature, and
267+ * includes features without URLs as empty entries so they're available to
268+ * the token-based fallback matcher.
267269 * @param {* } contents the merged data tree.
268- * @returns {Map<string, Set<string>> } map from feature path to its set of URL keys .
270+ * @returns {Map<string, Set<string>> } map from feature path to URL set (possibly empty) .
269271 */
270- const collectFeatureUrls = ( contents ) => {
272+ const collectFeatures = ( contents ) => {
271273 /** @type {Map<string, Set<string>> } */
272274 const features = new Map ( ) ;
273275 for ( const { path, compat } of walk ( undefined , contents ) ) {
@@ -281,13 +283,28 @@ const collectFeatureUrls = (contents) => {
281283 if ( compat . mdn_url ) {
282284 urls . add ( `mdn:${ compat . mdn_url } ` ) ;
283285 }
284- if ( urls . size ) {
285- features . set ( path , urls ) ;
286- }
286+ features . set ( path , urls ) ;
287287 }
288288 return features ;
289289} ;
290290
291+ /**
292+ * Tokenizes a feature path's leaf segment into lowercase words, splitting on
293+ * `_`, `.` and camelCase boundaries. Returns a Set so each word counts once
294+ * per feature.
295+ * @param {string } path the feature path.
296+ * @returns {Set<string> } the leaf tokens.
297+ */
298+ const tokenizeLeaf = ( path ) => {
299+ const leaf = path . split ( '.' ) . pop ( ) ?? '' ;
300+ return new Set (
301+ leaf
302+ . split ( / [ _ . ] + | (? = [ A - Z ] ) / )
303+ . filter ( Boolean )
304+ . map ( ( w ) => w . toLowerCase ( ) ) ,
305+ ) ;
306+ } ;
307+
291308/**
292309 * Reads the value at a dot-separated path within a tree.
293310 * @param {* } root the root object.
@@ -351,18 +368,20 @@ const projectMoves = (baseContents, moves) => {
351368} ;
352369
353370/**
354- * Detects features that were moved (renamed) by matching shared spec_url/mdn_url
355- * between features removed in base and features added in head. When multiple
356- * candidates share a URL, prefers the candidate with the longest shared path
357- * prefix (so `api.fetch.init_X` prefers `api.fetch.options_parameter.X` over
358- * `api.Request.Request.options_parameter.X`).
371+ * Detects features that were moved (renamed) in two passes:
372+ * 1. Match by shared spec_url/mdn_url, with longest-shared-path-prefix as
373+ * tiebreaker when multiple candidates share a URL.
374+ * 2. For features still unmatched, match by common ancestor path plus
375+ * shared non-scaffold leaf words (`keepalive`, `signal`, etc.).
376+ * Scaffold tokens — those appearing in more than half of unmatched
377+ * removed or added features (e.g. `init`, `parameter`) — are ignored.
359378 * @param {* } baseContents the merged base data tree.
360379 * @param {* } headContents the merged head data tree.
361380 * @returns {Map<string, string> } map from removed path to added path.
362381 */
363382const detectMoves = ( baseContents , headContents ) => {
364- const baseFeatures = collectFeatureUrls ( baseContents ) ;
365- const headFeatures = collectFeatureUrls ( headContents ) ;
383+ const baseFeatures = collectFeatures ( baseContents ) ;
384+ const headFeatures = collectFeatures ( headContents ) ;
366385
367386 /** @type {Map<string, string[]> } */
368387 const addedByUrl = new Map ( ) ;
@@ -379,8 +398,10 @@ const detectMoves = (baseContents, headContents) => {
379398
380399 /** @type {Map<string, string> } */
381400 const moves = new Map ( ) ;
401+ /** @type {Set<string> } */
402+ const matchedDests = new Set ( ) ;
382403 for ( const [ removedPath , urls ] of baseFeatures ) {
383- if ( headFeatures . has ( removedPath ) ) {
404+ if ( headFeatures . has ( removedPath ) || urls . size === 0 ) {
384405 continue ;
385406 }
386407 /** @type {Set<string> } */
@@ -413,6 +434,97 @@ const detectMoves = (baseContents, headContents) => {
413434 }
414435 }
415436 moves . set ( removedPath , best ) ;
437+ matchedDests . add ( best ) ;
438+ }
439+
440+ // Pass 2: token + common-ancestor matching for the rest.
441+ const unmatchedRemoved = [ ...baseFeatures . keys ( ) ] . filter (
442+ ( p ) => ! headFeatures . has ( p ) && ! moves . has ( p ) ,
443+ ) ;
444+ const unmatchedAdded = [ ...headFeatures . keys ( ) ] . filter (
445+ ( p ) => ! baseFeatures . has ( p ) && ! matchedDests . has ( p ) ,
446+ ) ;
447+ if ( unmatchedRemoved . length === 0 || unmatchedAdded . length === 0 ) {
448+ return moves ;
449+ }
450+
451+ /** @type {Map<string, Set<string>> } */
452+ const removedTokens = new Map ( ) ;
453+ /** @type {Map<string, Set<string>> } */
454+ const addedTokens = new Map ( ) ;
455+ /** @type {Map<string, number> } */
456+ const removedFreq = new Map ( ) ;
457+ /** @type {Map<string, number> } */
458+ const addedFreq = new Map ( ) ;
459+ for ( const p of unmatchedRemoved ) {
460+ const tokens = tokenizeLeaf ( p ) ;
461+ removedTokens . set ( p , tokens ) ;
462+ for ( const t of tokens ) {
463+ removedFreq . set ( t , ( removedFreq . get ( t ) ?? 0 ) + 1 ) ;
464+ }
465+ }
466+ for ( const p of unmatchedAdded ) {
467+ const tokens = tokenizeLeaf ( p ) ;
468+ addedTokens . set ( p , tokens ) ;
469+ for ( const t of tokens ) {
470+ addedFreq . set ( t , ( addedFreq . get ( t ) ?? 0 ) + 1 ) ;
471+ }
472+ }
473+ /**
474+ * @param {string } token
475+ * @returns {boolean } true if the token is too common to be distinctive.
476+ */
477+ const isScaffold = ( token ) =>
478+ ( removedFreq . get ( token ) ?? 0 ) > unmatchedRemoved . length / 2 ||
479+ ( addedFreq . get ( token ) ?? 0 ) > unmatchedAdded . length / 2 ;
480+
481+ for ( const removedPath of unmatchedRemoved ) {
482+ const rTokens = /** @type {Set<string> } */ ( removedTokens . get ( removedPath ) ) ;
483+ const rParts = removedPath . split ( '.' ) ;
484+ let best = '' ;
485+ let bestScore = - 1 ;
486+
487+ for ( const addedPath of unmatchedAdded ) {
488+ if ( matchedDests . has ( addedPath ) ) {
489+ continue ;
490+ }
491+ const aTokens = /** @type {Set<string> } */ ( addedTokens . get ( addedPath ) ) ;
492+ const aParts = addedPath . split ( '.' ) ;
493+
494+ let ancestor = 0 ;
495+ while (
496+ ancestor < rParts . length - 1 &&
497+ ancestor < aParts . length - 1 &&
498+ rParts [ ancestor ] === aParts [ ancestor ]
499+ ) {
500+ ancestor ++ ;
501+ }
502+ if ( ancestor === 0 ) {
503+ continue ;
504+ }
505+
506+ let tokenScore = 0 ;
507+ for ( const t of rTokens ) {
508+ if ( aTokens . has ( t ) && ! isScaffold ( t ) ) {
509+ const freq = ( removedFreq . get ( t ) ?? 0 ) + ( addedFreq . get ( t ) ?? 0 ) || 1 ;
510+ tokenScore += 1 / freq ;
511+ }
512+ }
513+ if ( tokenScore === 0 ) {
514+ continue ;
515+ }
516+
517+ const score = ancestor * 1000 + tokenScore ;
518+ if ( score > bestScore ) {
519+ best = addedPath ;
520+ bestScore = score ;
521+ }
522+ }
523+
524+ if ( best ) {
525+ moves . set ( removedPath , best ) ;
526+ matchedDests . add ( best ) ;
527+ }
416528 }
417529
418530 return moves ;
0 commit comments