66
77import json
88from collections import defaultdict
9- from collections .abc import Iterator
9+ from collections .abc import Callable , Hashable , Iterator
1010from difflib import SequenceMatcher
1111from pathlib import Path
1212from typing import NamedTuple
@@ -34,26 +34,35 @@ class PackageDiff(NamedTuple):
3434 changed : list [ChangedMessage ] # same message, altered line / text
3535
3636
37- def _pair_by_position (
38- old_residuals : list [JSONMessage ], new_residuals : list [JSONMessage ]
37+ SYMBOL_RENAME_SIMILARITY_THRESHOLD = 0.5
38+
39+
40+ def _pair_by_key (
41+ old_residuals : list [JSONMessage ],
42+ new_residuals : list [JSONMessage ],
43+ key : Callable [[JSONMessage ], tuple [Hashable , ...]],
44+ accept : Callable [[JSONMessage , JSONMessage ], bool ] = lambda _o , _n : True ,
3945) -> tuple [list [ChangedMessage ], list [JSONMessage ], list [JSONMessage ]]:
40- """Pair residual messages by ``(symbol, path, obj)`` in input order.
46+ """Pair messages whose ``key`` matches, in input order.
4147
42- Two messages sharing that key are the "same warning" — if they differ only
48+ Two messages sharing the key are the "same warning" — if they differ only
4349 in line numbers or message text, they should be reported as *changed*
4450 rather than as a separate removal + addition. Leftovers on each side are
45- genuinely missing or genuinely new.
51+ passed through to the next pass (or reported as missing/new).
52+
53+ ``accept`` is an optional predicate that can veto a candidate pair (used to
54+ avoid pairing two unrelated messages that happen to share the key).
4655 """
47- new_by_key : dict [tuple [str , str , str ], list [JSONMessage ]] = defaultdict (list )
56+ new_by_key : dict [tuple [Hashable , ... ], list [JSONMessage ]] = defaultdict (list )
4857 for m in new_residuals :
49- new_by_key [m [ "symbol" ], m [ "path" ], m [ "obj" ] ].append (m )
58+ new_by_key [key ( m ) ].append (m )
5059
5160 paired : list [ChangedMessage ] = []
5261 final_missing : list [JSONMessage ] = []
5362 matched_new : set [int ] = set ()
5463 for old in old_residuals :
55- bucket = new_by_key .get ((old [ "symbol" ], old [ "path" ], old [ "obj" ] ))
56- if bucket :
64+ bucket = new_by_key .get (key (old ))
65+ if bucket and accept ( old , bucket [ 0 ]) :
5766 new = bucket .pop (0 )
5867 paired .append (ChangedMessage (old = old , new = new ))
5968 matched_new .add (id (new ))
@@ -63,6 +72,48 @@ def _pair_by_position(
6372 return paired , final_missing , final_new
6473
6574
75+ def _is_symbol_rename (old : JSONMessage , new : JSONMessage ) -> bool :
76+ """True when the two symbols are textually similar enough to be a rename.
77+
78+ Used to avoid pairing unrelated messages that happen to share a source
79+ position (e.g. ``useless-suppression`` removed and ``locally-disabled``
80+ added on the same line — those are conceptually opposite, not a rename).
81+ """
82+ ratio = SequenceMatcher (None , old ["symbol" ], new ["symbol" ]).ratio ()
83+ return ratio >= SYMBOL_RENAME_SIMILARITY_THRESHOLD
84+
85+
86+ def _pair_residuals (
87+ old_residuals : list [JSONMessage ], new_residuals : list [JSONMessage ]
88+ ) -> tuple [list [ChangedMessage ], list [JSONMessage ], list [JSONMessage ]]:
89+ """Pair residual messages with two passes of decreasing strictness.
90+
91+ First by ``(symbol, path, obj)`` — catches line-only or message-only changes
92+ for the same warning. Then by exact source location, gated on symbol
93+ similarity — catches symbol renames such as ``used-before-assignment`` →
94+ ``possibly-used-before-assignment``, where the same code position now
95+ emits a renamed message.
96+ """
97+ paired_by_symbol , missing , new = _pair_by_key (
98+ old_residuals ,
99+ new_residuals ,
100+ key = lambda m : (m ["symbol" ], m ["path" ], m ["obj" ]),
101+ )
102+ paired_by_location , missing , new = _pair_by_key (
103+ missing ,
104+ new ,
105+ key = lambda m : (
106+ m ["path" ],
107+ m ["line" ],
108+ m ["column" ],
109+ m .get ("endLine" ),
110+ m .get ("endColumn" ),
111+ ),
112+ accept = _is_symbol_rename ,
113+ )
114+ return paired_by_symbol + paired_by_location , missing , new
115+
116+
66117def format_span (msg : JSONMessage ) -> str :
67118 """Format a message's location as ``line:col to endLine:endCol``."""
68119 start = f"{ msg ['line' ]} :{ msg ['column' ]} "
@@ -176,9 +227,10 @@ def __iter__(self) -> Iterator[PackageDiff]:
176227 except ValueError :
177228 residual_old .append (message )
178229
179- # Second pass: pair residuals by position to detect *changed*
180- # messages (same warning, different line or text).
181- paired , final_missing , final_new = _pair_by_position (
230+ # Second pass: pair residuals by symbol then by location to
231+ # detect *changed* messages (same warning, different line/text,
232+ # or a symbol rename at the same source position).
233+ paired , final_missing , final_new = _pair_residuals (
182234 residual_old , pr_messages
183235 )
184236
0 commit comments