Skip to content

Commit b74faa8

Browse files
[primer] Detect changed messages via position-based pairing
Instead of reporting a moved or reworded diagnostic as a separate removal + addition, pair messages that share the same (symbol, path, obj) key and report them as "changed" with a human-readable diff. Comparator.__iter__ now yields 4-tuples (package, missing, new, changed) and exposes a commits dict. Short field changes (type, confidence) use an inline format; long message strings get a diff block with caret hints highlighting the changed spans.
1 parent 2d941ea commit b74faa8

24 files changed

Lines changed: 270 additions & 160 deletions

pylint/testutils/_primer/comparator.py

Lines changed: 143 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,146 @@
55
from __future__ import annotations
66

77
import json
8-
from collections.abc import Generator
8+
from collections import defaultdict
9+
from collections.abc import Iterator
10+
from difflib import SequenceMatcher
911
from pathlib import Path
1012

1113
from pylint.reporters.json_reporter import JSONMessage
12-
from pylint.testutils._primer.primer_command import (
13-
PackageData,
14-
PackageMessages,
15-
)
14+
from pylint.testutils._primer.primer_command import PackageMessages
15+
16+
ChangedMessage = tuple[JSONMessage, JSONMessage] # (old, new)
17+
PackageDiff = tuple[str, list[JSONMessage], list[JSONMessage], list[ChangedMessage]]
18+
19+
_LOCATION_KEYS = {"line", "column", "endLine", "endColumn"}
20+
21+
22+
def _position_key(msg: JSONMessage) -> tuple[str, str, str]:
23+
"""Key that identifies a diagnostic independently of its text or location.
24+
25+
Two messages that share (symbol, path, obj) are the "same diagnostic" — if
26+
they differ only in line numbers or message text, they should be reported as
27+
*changed* rather than as a separate removal + addition.
28+
"""
29+
return (msg["symbol"], msg["path"], msg["obj"])
30+
31+
32+
def _match_residuals(
33+
old_residuals: list[JSONMessage], new_residuals: list[JSONMessage]
34+
) -> tuple[list[ChangedMessage], list[JSONMessage], list[JSONMessage]]:
35+
"""Pair residual messages by position key ``(symbol, path, obj)``.
36+
37+
Messages that share the same key are paired 1:1 in order. Any left-over
38+
messages remain as genuinely missing or genuinely new.
39+
40+
Returns ``(paired, unmatched_old, unmatched_new)``.
41+
"""
42+
old_by_key: dict[tuple[str, str, str], list[JSONMessage]] = defaultdict(list)
43+
new_by_key: dict[tuple[str, str, str], list[JSONMessage]] = defaultdict(list)
44+
for m in old_residuals:
45+
old_by_key[_position_key(m)].append(m)
46+
for m in new_residuals:
47+
new_by_key[_position_key(m)].append(m)
48+
49+
paired: list[ChangedMessage] = []
50+
paired_old_ids: set[int] = set()
51+
paired_new_ids: set[int] = set()
52+
for key in old_by_key:
53+
if key not in new_by_key:
54+
continue
55+
for old, new in zip(old_by_key[key], new_by_key[key]):
56+
paired.append((old, new))
57+
paired_old_ids.add(id(old))
58+
paired_new_ids.add(id(new))
59+
60+
final_missing = [m for m in old_residuals if id(m) not in paired_old_ids]
61+
final_new = [m for m in new_residuals if id(m) not in paired_new_ids]
62+
return paired, final_missing, final_new
63+
64+
65+
def format_span(msg: JSONMessage) -> str:
66+
"""Format a message's location as ``line:col to endLine:endCol``."""
67+
start = f"{msg['line']}:{msg['column']}"
68+
end_line = msg.get("endLine")
69+
end_col = msg.get("endColumn")
70+
if end_line is not None and end_col is not None:
71+
return f"from `{start}` to `{end_line}:{end_col}`"
72+
return f"at `{start}`"
73+
74+
75+
def message_diff(old: JSONMessage, new: JSONMessage) -> str:
76+
"""Return a compact summary of changed fields between two messages.
77+
78+
Location changes are merged into a single human-readable span.
79+
String fields (like ``message``) get a ``diff`` code block so GitHub
80+
renders them with red/green highlighting.
81+
"""
82+
changed_keys: set[str] = set()
83+
for key in old:
84+
if old[key] != new[key]: # type: ignore[literal-required]
85+
changed_keys.add(key)
86+
87+
parts: list[str] = []
88+
# Location: combine line/column/endLine/endColumn into one sentence.
89+
if changed_keys & _LOCATION_KEYS:
90+
parts.append(f"Was raised {format_span(old)}, now {format_span(new)}.")
91+
92+
# Other fields (typically ``message`` or ``type``).
93+
for key in sorted(changed_keys - _LOCATION_KEYS):
94+
old_val = old[key] # type: ignore[literal-required]
95+
new_val = new[key] # type: ignore[literal-required]
96+
if (
97+
isinstance(old_val, str)
98+
and isinstance(new_val, str)
99+
and (len(old_val) > 40 or len(new_val) > 40)
100+
):
101+
caret_line = _caret_hint(old_val, new_val)
102+
diff_block = f"```diff\n- {old_val}\n+ {new_val}\n"
103+
if caret_line:
104+
diff_block += f" {caret_line}\n"
105+
diff_block += "```"
106+
parts.append(diff_block)
107+
else:
108+
parts.append(f"{key}: `{old_val}` → `{new_val}`")
109+
return "\n\n".join(parts)
110+
111+
112+
def _caret_hint(old: str, new: str) -> str:
113+
"""Build a ``^`` marker line highlighting changed spans in *new*.
114+
115+
Uses SequenceMatcher to find which parts of *new* differ from *old*
116+
and places ``^`` characters under them (aligned with the ``+ `` prefix).
117+
Returns an empty string when the whole line changed (carets wouldn't help).
118+
"""
119+
matcher = SequenceMatcher(None, old, new)
120+
carets = [" "] * len(new)
121+
changed_count = 0
122+
for tag, _i1, _i2, j1, j2 in matcher.get_opcodes():
123+
if tag != "equal":
124+
for j in range(j1, j2):
125+
carets[j] = "^"
126+
changed_count += j2 - j1
127+
# If most of the string changed, carets are just noise.
128+
if changed_count > len(new) * 0.6:
129+
return ""
130+
return "".join(carets).rstrip()
16131

17132

18133
class Comparator:
19-
"""Cross-reference two primer JSON outputs and iterate over differences."""
134+
"""Cross-reference two primer JSON outputs and iterate over differences.
135+
136+
Yields ``(package, missing, new, changed)`` for each package that has at
137+
least one difference. *changed* contains pairs of ``(old, new)`` messages
138+
that are the same diagnostic but with altered details (line, message text,
139+
etc.).
140+
"""
20141

21142
def __init__(self, main_data: PackageMessages, pr_data: PackageMessages) -> None:
22143
self._main_data = main_data
23144
self._pr_data = pr_data
145+
self.commits: dict[str, str] = {
146+
pkg: data["commit"] for pkg, data in pr_data.items()
147+
}
24148

25149
@staticmethod
26150
def from_json(
@@ -45,28 +169,29 @@ def from_json(
45169
)
46170
return Comparator(main_data, pr_data)
47171

48-
def __iter__(self) -> Generator[tuple[str, PackageData, PackageData]]:
172+
def __iter__(self) -> Iterator[PackageDiff]:
49173
main_data = self._main_data
50174
pr_data = self._pr_data
51175

52-
missing_messages: PackageMessages = {}
53176
for package, data in main_data.items():
54-
package_missing_messages: list[JSONMessage] = []
177+
# First pass: exact-match removal.
178+
pr_messages = list(pr_data[package]["messages"])
179+
residual_old: list[JSONMessage] = []
55180
for message in data["messages"]:
56181
try:
57-
pr_data[package]["messages"].remove(message)
182+
pr_messages.remove(message)
58183
except ValueError:
59-
package_missing_messages.append(message)
60-
missing_messages[package] = PackageData(
61-
commit=pr_data[package]["commit"],
62-
messages=package_missing_messages,
184+
residual_old.append(message)
185+
186+
# Second pass: pair residuals by position to detect *changed*
187+
# messages (same diagnostic, different line or text).
188+
paired, final_missing, final_new = _match_residuals(
189+
residual_old, pr_messages
63190
)
64191

65-
for package, pkg_missing in missing_messages.items():
66-
new_messages = pr_data[package]
67-
if not pkg_missing["messages"] and not new_messages["messages"]:
192+
if not final_missing and not final_new and not paired:
68193
continue
69-
yield package, pkg_missing, new_messages
194+
yield package, final_missing, final_new, paired
70195

71196
@staticmethod
72197
def _load_json(file_path: Path | str) -> PackageMessages:

pylint/testutils/_primer/primer_compare_command.py

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,13 @@
55

66
from pathlib import PurePosixPath
77

8-
from pylint.testutils._primer.comparator import Comparator
9-
from pylint.testutils._primer.primer_command import PackageData, PrimerCommand
8+
from pylint.reporters.json_reporter import JSONMessage
9+
from pylint.testutils._primer.comparator import (
10+
ChangedMessage,
11+
Comparator,
12+
message_diff,
13+
)
14+
from pylint.testutils._primer.primer_command import PrimerCommand
1015

1116
MAX_GITHUB_COMMENT_LENGTH = 65536
1217

@@ -22,11 +27,15 @@ def run(self) -> None:
2227

2328
def _create_comment(self, comparator: Comparator) -> str:
2429
comment = ""
25-
for package, missing_messages, new_messages in comparator:
30+
for package, missing_messages, new_messages, changed_messages in comparator:
2631
if len(comment) >= MAX_GITHUB_COMMENT_LENGTH:
2732
break
2833
comment += self._create_comment_for_package(
29-
package, new_messages, missing_messages
34+
package,
35+
missing_messages,
36+
new_messages,
37+
changed_messages,
38+
comparator.commits[package],
3039
)
3140
comment = (
3241
f"🤖 **Effect of this PR on checked open source code:** 🤖\n\n{comment}"
@@ -39,29 +48,51 @@ def _create_comment(self, comparator: Comparator) -> str:
3948
return self._truncate_comment(comment)
4049

4150
def _create_comment_for_package(
42-
self, package: str, new_messages: PackageData, missing_messages: PackageData
51+
self,
52+
package: str,
53+
missing_messages: list[JSONMessage],
54+
new_messages: list[JSONMessage],
55+
changed_messages: list[ChangedMessage],
56+
commit: str,
4357
) -> str:
44-
comment = f"\n**Effect on [{package}]({self.packages[package].url}):**\n\n"
45-
# Create comment for new messages
58+
url = self.packages[package].url
59+
clone_dir = self.packages[package].clone_directory
60+
61+
assert not url.endswith(
62+
".git"
63+
), "You don't need the .git at the end of the github url."
64+
65+
def _source_link(msg: JSONMessage) -> str:
66+
filepath = str(PurePosixPath(msg["path"]).relative_to(clone_dir))
67+
return f"{url}/blob/{commit}/{filepath}#L{msg['line']}"
68+
69+
comment = f"\n**Effect on [{package}]({url}):**\n\n"
70+
71+
# Changed messages
72+
if changed_messages:
73+
print("Changed:")
74+
comment += "The following messages have been changed:\n\n<details>\n\n"
75+
for count, (old, new) in enumerate(changed_messages, 1):
76+
comment += (
77+
f"{count}) [{new['symbol']}]({_source_link(new)}):\n"
78+
f"{message_diff(old, new)}\n"
79+
)
80+
print(new)
81+
comment += "</details>\n\n"
82+
83+
# New messages
4684
count = 1
4785
astroid_errors = 0
4886
new_non_astroid_messages = ""
49-
if new_messages["messages"]:
87+
if new_messages:
5088
print("Now emitted:")
51-
for message in new_messages["messages"]:
52-
filepath = str(
53-
PurePosixPath(message["path"]).relative_to(
54-
self.packages[package].clone_directory
55-
)
56-
)
57-
# Existing astroid errors may still show up as "new" because the timestamp
58-
# in the message is slightly different.
89+
for message in new_messages:
5990
if message["symbol"] == "astroid-error":
6091
astroid_errors += 1
6192
else:
6293
new_non_astroid_messages += (
6394
f"{count}) {message['symbol']}:\n*{message['message']}*\n"
64-
f"{self.packages[package].url}/blob/{new_messages['commit']}/{filepath}#L{message['line']}\n"
95+
f"{_source_link(message)}\n"
6596
)
6697
print(message)
6798
count += 1
@@ -78,28 +109,19 @@ def _create_comment_for_package(
78109
+ "</details>\n\n"
79110
)
80111

81-
# Create comment for missing messages
112+
# Missing messages
82113
count = 1
83-
if missing_messages["messages"]:
114+
if missing_messages:
84115
comment += "The following messages are no longer emitted:\n\n<details>\n\n"
85116
print("No longer emitted:")
86-
for message in missing_messages["messages"]:
87-
comment += f"{count}) {message['symbol']}:\n*{message['message']}*\n"
88-
filepath = str(
89-
PurePosixPath(message["path"]).relative_to(
90-
self.packages[package].clone_directory
91-
)
92-
)
93-
assert not self.packages[package].url.endswith(
94-
".git"
95-
), "You don't need the .git at the end of the github url."
117+
for message in missing_messages:
96118
comment += (
97-
f"{self.packages[package].url}"
98-
f"/blob/{new_messages['commit']}/{filepath}#L{message['line']}\n"
119+
f"{count}) {message['symbol']}:\n*{message['message']}*\n"
120+
f"{_source_link(message)}\n"
99121
)
100122
count += 1
101123
print(message)
102-
if missing_messages["messages"]:
124+
if missing_messages:
103125
comment += "</details>\n\n"
104126
return comment
105127

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{ "package": "astroid", "missing": 2, "new": 0 }]
1+
[{ "package": "astroid", "missing": 2, "new": 0, "changed": 0 }]

tests/testutils/_primer/cases/confidence_changed/expected.txt

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,12 @@
33

44
**Effect on [astroid](https://github.com/pylint-dev/astroid):**
55

6-
The following messages are now emitted:
6+
The following messages have been changed:
77

88
<details>
99

10-
1) dangerous-default-value:
11-
*Dangerous default value [] as argument*
12-
https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/inference.py#L42
13-
</details>
14-
15-
The following messages are no longer emitted:
16-
17-
<details>
18-
19-
1) dangerous-default-value:
20-
*Dangerous default value [] as argument*
21-
https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/inference.py#L42
10+
1) [dangerous-default-value](https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/inference.py#L42):
11+
confidence: `UNDEFINED` → `HIGH`
2212
</details>
2313

2414
*This comment was generated for commit v2.14.2*
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{ "package": "astroid", "missing": 1, "new": 1 }]
1+
[{ "package": "astroid", "missing": 0, "new": 0, "changed": 1 }]

tests/testutils/_primer/cases/line_moved/expected.txt

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,12 @@
33

44
**Effect on [astroid](https://github.com/pylint-dev/astroid):**
55

6-
The following messages are now emitted:
6+
The following messages have been changed:
77

88
<details>
99

10-
1) too-complex:
11-
*'my_function' is too complex. The McCabe rating is 18*
12-
https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/node_classes.py#L103
13-
</details>
14-
15-
The following messages are no longer emitted:
16-
17-
<details>
18-
19-
1) too-complex:
20-
*'my_function' is too complex. The McCabe rating is 18*
21-
https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/node_classes.py#L100
10+
1) [too-complex](https://github.com/pylint-dev/astroid/blob/123456789abcdef/astroid/node_classes.py#L103):
11+
Was raised from `100:0` to `100:15`, now from `103:0` to `103:15`.
2212
</details>
2313

2414
*This comment was generated for commit v2.14.2*
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{ "package": "astroid", "missing": 1, "new": 1 }]
1+
[{ "package": "astroid", "missing": 0, "new": 0, "changed": 1 }]

0 commit comments

Comments
 (0)