Skip to content

Commit 6e7d3c9

Browse files
[primer] Pair symbol-renamed messages at the same location
Adds a second pairing pass keyed on exact source location, gated on ``SequenceMatcher.ratio() >= 0.5`` between the old and new symbols. This catches renames like ``used-before-assignment`` → ``possibly-used-before-assignment`` and reports them as one *changed* message instead of a separate removal + addition. The similarity gate avoids pairing unrelated messages that happen to coincide on the same line (e.g. ``useless-suppression`` removed and ``locally-disabled`` added at the same position).
1 parent 6705d74 commit 6e7d3c9

7 files changed

Lines changed: 159 additions & 18 deletions

File tree

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
The primer now pairs residual messages by ``(symbol, path, obj)`` and reports altered
2-
messages as a single *changed* entry with a compact diff, rather than as a separate
3-
removal + addition. New messages are classified into genuine new messages, fixed false
4-
positives (``useless-suppression``), new false positives (``locally-disabled``), and
5-
``astroid-error`` fatal errors.
1+
The primer now pairs residual messages — first by ``(symbol, path, obj)`` and then by
2+
exact source location — and reports altered messages as a single *changed* entry with
3+
a compact diff, rather than as a separate removal + addition. The location-based pass
4+
also catches symbol renames at the same code position (e.g. ``used-before-assignment``
5+
→ ``possibly-used-before-assignment``). New messages are classified into genuine new
6+
messages, fixed false positives (``useless-suppression``), new false positives
7+
(``locally-disabled``), and ``astroid-error`` fatal errors.
68

79
Refs #10914

pylint/testutils/_primer/comparator.py

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import json
88
from collections import defaultdict
9-
from collections.abc import Iterator
9+
from collections.abc import Callable, Hashable, Iterator
1010
from difflib import SequenceMatcher
1111
from pathlib import Path
1212
from typing import NamedTuple
@@ -34,26 +34,35 @@ class PackageDiff(NamedTuple):
3434
changed: list[ChangedMessage] # same message, altered line / text
3535

3636

37-
def _pair_by_position(
38-
old_residuals: list[JSONMessage], new_residuals: list[JSONMessage]
37+
SYMBOL_RENAME_SIMILARITY_THRESHOLD = 0.5
38+
39+
40+
def _pair_by_key(
41+
old_residuals: list[JSONMessage],
42+
new_residuals: list[JSONMessage],
43+
key: Callable[[JSONMessage], tuple[Hashable, ...]],
44+
accept: Callable[[JSONMessage, JSONMessage], bool] = lambda _o, _n: True,
3945
) -> tuple[list[ChangedMessage], list[JSONMessage], list[JSONMessage]]:
40-
"""Pair residual messages by ``(symbol, path, obj)`` in input order.
46+
"""Pair messages whose ``key`` matches, in input order.
4147
42-
Two messages sharing that key are the "same warning" — if they differ only
48+
Two messages sharing the key are the "same warning" — if they differ only
4349
in line numbers or message text, they should be reported as *changed*
4450
rather than as a separate removal + addition. Leftovers on each side are
45-
genuinely missing or genuinely new.
51+
passed through to the next pass (or reported as missing/new).
52+
53+
``accept`` is an optional predicate that can veto a candidate pair (used to
54+
avoid pairing two unrelated messages that happen to share the key).
4655
"""
47-
new_by_key: dict[tuple[str, str, str], list[JSONMessage]] = defaultdict(list)
56+
new_by_key: dict[tuple[Hashable, ...], list[JSONMessage]] = defaultdict(list)
4857
for m in new_residuals:
49-
new_by_key[m["symbol"], m["path"], m["obj"]].append(m)
58+
new_by_key[key(m)].append(m)
5059

5160
paired: list[ChangedMessage] = []
5261
final_missing: list[JSONMessage] = []
5362
matched_new: set[int] = set()
5463
for old in old_residuals:
55-
bucket = new_by_key.get((old["symbol"], old["path"], old["obj"]))
56-
if bucket:
64+
bucket = new_by_key.get(key(old))
65+
if bucket and accept(old, bucket[0]):
5766
new = bucket.pop(0)
5867
paired.append(ChangedMessage(old=old, new=new))
5968
matched_new.add(id(new))
@@ -63,6 +72,48 @@ def _pair_by_position(
6372
return paired, final_missing, final_new
6473

6574

75+
def _is_symbol_rename(old: JSONMessage, new: JSONMessage) -> bool:
76+
"""True when the two symbols are textually similar enough to be a rename.
77+
78+
Used to avoid pairing unrelated messages that happen to share a source
79+
position (e.g. ``useless-suppression`` removed and ``locally-disabled``
80+
added on the same line — those are conceptually opposite, not a rename).
81+
"""
82+
ratio = SequenceMatcher(None, old["symbol"], new["symbol"]).ratio()
83+
return ratio >= SYMBOL_RENAME_SIMILARITY_THRESHOLD
84+
85+
86+
def _pair_residuals(
87+
old_residuals: list[JSONMessage], new_residuals: list[JSONMessage]
88+
) -> tuple[list[ChangedMessage], list[JSONMessage], list[JSONMessage]]:
89+
"""Pair residual messages with two passes of decreasing strictness.
90+
91+
First by ``(symbol, path, obj)`` — catches line-only or message-only changes
92+
for the same warning. Then by exact source location, gated on symbol
93+
similarity — catches symbol renames such as ``used-before-assignment`` →
94+
``possibly-used-before-assignment``, where the same code position now
95+
emits a renamed message.
96+
"""
97+
paired_by_symbol, missing, new = _pair_by_key(
98+
old_residuals,
99+
new_residuals,
100+
key=lambda m: (m["symbol"], m["path"], m["obj"]),
101+
)
102+
paired_by_location, missing, new = _pair_by_key(
103+
missing,
104+
new,
105+
key=lambda m: (
106+
m["path"],
107+
m["line"],
108+
m["column"],
109+
m.get("endLine"),
110+
m.get("endColumn"),
111+
),
112+
accept=_is_symbol_rename,
113+
)
114+
return paired_by_symbol + paired_by_location, missing, new
115+
116+
66117
def format_span(msg: JSONMessage) -> str:
67118
"""Format a message's location as ``line:col to endLine:endCol``."""
68119
start = f"{msg['line']}:{msg['column']}"
@@ -176,9 +227,10 @@ def __iter__(self) -> Iterator[PackageDiff]:
176227
except ValueError:
177228
residual_old.append(message)
178229

179-
# Second pass: pair residuals by position to detect *changed*
180-
# messages (same warning, different line or text).
181-
paired, final_missing, final_new = _pair_by_position(
230+
# Second pass: pair residuals by symbol then by location to
231+
# detect *changed* messages (same warning, different line/text,
232+
# or a symbol rename at the same source position).
233+
paired, final_missing, final_new = _pair_residuals(
182234
residual_old, pr_messages
183235
)
184236

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
🤖 **Effect of this PR on checked open source code:** 🤖
2+
3+
4+
**Effect on [astroid](https://github.com/pylint-dev/astroid):**
5+
6+
Changed messages:
7+
8+
<details>
9+
10+
1) [possibly-used-before-assignment](https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/wcs.py#L3072):
11+
confidence: `HIGH` → `INFERENCE`
12+
13+
```diff
14+
- Using variable 'orig_key' before assignment
15+
+ Possibly using variable 'orig_key' before assignment
16+
^^^^^^^^^^
17+
```
18+
19+
messageId: `E0601` → `E0606`
20+
21+
symbol: `used-before-assignment` → `possibly-used-before-assignment`
22+
23+
type: `error` → `warning`
24+
</details>
25+
26+
*This comment was generated for commit v2.14.2*
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{ "package": "astroid", "missing": 0, "new": 0, "changed": 1 }]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"astroid": {
3+
"commit": "1234567890abcdef",
4+
"messages": [
5+
{
6+
"type": "error",
7+
"module": "astroid.wcs",
8+
"obj": "WCS.fix",
9+
"line": 3072,
10+
"column": 12,
11+
"endLine": 3072,
12+
"endColumn": 20,
13+
"path": "tests/.pylint_primer_tests/pylint-dev/astroid/astroid/wcs.py",
14+
"symbol": "used-before-assignment",
15+
"message": "Using variable 'orig_key' before assignment",
16+
"messageId": "E0601",
17+
"confidence": "HIGH",
18+
"absolutePath": "tests/.pylint_primer_tests/pylint-dev/astroid/astroid/wcs.py"
19+
}
20+
]
21+
}
22+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"astroid": {
3+
"commit": "123456789abcdef",
4+
"messages": [
5+
{
6+
"type": "warning",
7+
"module": "astroid.wcs",
8+
"obj": "WCS.fix",
9+
"line": 3072,
10+
"column": 12,
11+
"endLine": 3072,
12+
"endColumn": 20,
13+
"path": "tests/.pylint_primer_tests/pylint-dev/astroid/astroid/wcs.py",
14+
"symbol": "possibly-used-before-assignment",
15+
"message": "Possibly using variable 'orig_key' before assignment",
16+
"messageId": "E0606",
17+
"confidence": "INFERENCE",
18+
"absolutePath": "tests/.pylint_primer_tests/pylint-dev/astroid/astroid/wcs.py"
19+
}
20+
]
21+
}
22+
}

tests/testutils/_primer/test_comparator.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pylint.testutils._primer.comparator import (
1313
Comparator,
1414
_caret_hint,
15+
_is_symbol_rename,
1516
format_span,
1617
)
1718

@@ -67,6 +68,21 @@ def test_caret_hint_returns_empty_when_mostly_changed() -> None:
6768
assert _caret_hint("hello world", "goodbye universe") == ""
6869

6970

71+
@pytest.mark.parametrize(
72+
("old_symbol", "new_symbol", "expected"),
73+
[
74+
("used-before-assignment", "possibly-used-before-assignment", True),
75+
("abstract-method", "no-abstract-method", True),
76+
("useless-suppression", "locally-disabled", False),
77+
("too-complex", "too-many-locals", False),
78+
],
79+
)
80+
def test_is_symbol_rename(old_symbol: str, new_symbol: str, expected: bool) -> None:
81+
old = _msg(symbol=old_symbol)
82+
new = _msg(symbol=new_symbol)
83+
assert _is_symbol_rename(old, new) is expected
84+
85+
7086
def test_comparator_batched() -> None:
7187
fixture = Path(__file__).parent / "batched_cases"
7288
comparator = Comparator.from_json(

0 commit comments

Comments
 (0)