Skip to content

Commit 6798ab4

Browse files
Smarter primer diff: detect changed messages via position-based pairing
Instead of reporting moved or reworded diagnostics as separate removal + addition, pair residual messages by (symbol, path, obj) and show a compact diff of what changed (line, message text, etc.). Also: move test fixtures from cases/ to fixtures/, add line_moved and message_changed_line test scenarios, and update truncation expected files for the </details> closing-tag fix.
1 parent b8279be commit 6798ab4

45 files changed

Lines changed: 413 additions & 140 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pylint/testutils/_primer/comparator.py

Lines changed: 155 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,65 +5,175 @@
55
from __future__ import annotations
66

77
import json
8-
from collections.abc import Generator
8+
from collections import defaultdict
9+
from collections.abc import Iterator
910
from pathlib import Path
1011

1112
from pylint.reporters.json_reporter import OldJsonExport
12-
from pylint.testutils._primer.primer_command import (
13-
PackageData,
14-
PackageMessages,
15-
)
13+
from pylint.testutils._primer.primer_command import PackageMessages
1614

15+
ChangedMessage = tuple[OldJsonExport, OldJsonExport] # (old, new)
1716

18-
class Comparator:
19-
def __init__(
20-
self, base_file: str, new_file: str, batches: int | None = None
21-
) -> None:
22-
self._base_file = base_file
23-
self._new_file = new_file
24-
self._batches = batches
25-
26-
def __iter__(self) -> Generator[tuple[str, PackageData, PackageData]]:
27-
main_data: PackageMessages
28-
if self._batches is None:
29-
main_data = self._load_json(self._base_file)
30-
pr_data = self._load_json(self._new_file)
17+
_LOCATION_KEYS = {"line", "column", "endLine", "endColumn"}
18+
19+
20+
def _position_key(msg: OldJsonExport) -> tuple[str, str, str]:
21+
"""Key that identifies a diagnostic independently of its text or location.
22+
23+
Two messages that share (symbol, path, obj) are the "same diagnostic" — if
24+
they differ only in line numbers or message text, they should be reported as
25+
*changed* rather than as a separate removal + addition.
26+
"""
27+
return (msg["symbol"], msg["path"], msg["obj"])
28+
29+
30+
def _match_residuals(
31+
old_residuals: list[OldJsonExport], new_residuals: list[OldJsonExport]
32+
) -> tuple[list[ChangedMessage], list[OldJsonExport], list[OldJsonExport]]:
33+
"""Pair residual messages by position key ``(symbol, path, obj)``.
34+
35+
Messages that share the same key are paired 1:1 in order. Any left-over
36+
messages remain as genuinely missing or genuinely new.
37+
38+
Returns ``(paired, unmatched_old, unmatched_new)``.
39+
"""
40+
old_by_key: dict[tuple[str, str, str], list[OldJsonExport]] = defaultdict(list)
41+
new_by_key: dict[tuple[str, str, str], list[OldJsonExport]] = defaultdict(list)
42+
for m in old_residuals:
43+
old_by_key[_position_key(m)].append(m)
44+
for m in new_residuals:
45+
new_by_key[_position_key(m)].append(m)
46+
47+
paired: list[ChangedMessage] = []
48+
paired_old_ids: set[int] = set()
49+
paired_new_ids: set[int] = set()
50+
for key in old_by_key:
51+
if key not in new_by_key:
52+
continue
53+
for old, new in zip(old_by_key[key], new_by_key[key]):
54+
paired.append((old, new))
55+
paired_old_ids.add(id(old))
56+
paired_new_ids.add(id(new))
57+
58+
final_missing = [m for m in old_residuals if id(m) not in paired_old_ids]
59+
final_new = [m for m in new_residuals if id(m) not in paired_new_ids]
60+
return paired, final_missing, final_new
61+
62+
63+
def format_span(msg: OldJsonExport) -> str:
64+
"""Format a message's location as ``line:col to endLine:endCol``."""
65+
start = f"{msg['line']}:{msg['column']}"
66+
end_line = msg.get("endLine")
67+
end_col = msg.get("endColumn")
68+
if end_line is not None and end_col is not None:
69+
return f"{start} to {end_line}:{end_col}"
70+
return start
71+
72+
73+
def message_diff(old: OldJsonExport, new: OldJsonExport) -> str:
74+
"""Return a compact summary of changed fields between two messages.
75+
76+
Location changes are merged into a single human-readable span.
77+
String fields (like ``message``) get a ``diff`` code block so GitHub
78+
renders them with red/green highlighting.
79+
"""
80+
changed_keys: set[str] = set()
81+
for key in old:
82+
if old[key] != new[key]: # type: ignore[literal-required]
83+
changed_keys.add(key)
84+
85+
parts: list[str] = []
86+
# Location: combine line/column/endLine/endColumn into one sentence.
87+
if changed_keys & _LOCATION_KEYS:
88+
parts.append(f"Was raised on {format_span(old)}, now on {format_span(new)}.")
89+
90+
# Other fields (typically ``message``).
91+
for key in sorted(changed_keys - _LOCATION_KEYS):
92+
old_val = old[key] # type: ignore[literal-required]
93+
new_val = new[key] # type: ignore[literal-required]
94+
if isinstance(old_val, str) and isinstance(new_val, str):
95+
parts.append(f"The {key} changed:\n```diff\n- {old_val}\n+ {new_val}\n```")
3196
else:
32-
main_data = {}
33-
pr_data = {}
34-
for idx in range(self._batches):
35-
main_data.update(
36-
self._load_json(
37-
self._base_file.replace("BATCHIDX", "batch" + str(idx))
38-
)
39-
)
40-
pr_data.update(
41-
self._load_json(
42-
self._new_file.replace("BATCHIDX", "batch" + str(idx))
43-
)
44-
)
97+
parts.append(f"{key}: {old_val!r} -> {new_val!r}")
98+
return "\n".join(parts)
99+
100+
101+
class Comparator:
102+
"""Cross-reference two primer JSON outputs and iterate over differences.
103+
104+
Yields ``(package, missing, new, changed)`` for each package that has at
105+
least one difference. *changed* contains pairs of ``(old, new)`` messages
106+
that are the same diagnostic but with altered details (line, message text,
107+
etc.).
108+
"""
109+
110+
def __init__(self, main_data: PackageMessages, pr_data: PackageMessages) -> None:
111+
self.missing_messages: dict[str, list[OldJsonExport]] = {}
112+
self.new_messages: dict[str, list[OldJsonExport]] = {}
113+
self.changed_messages: dict[str, list[ChangedMessage]] = {}
114+
self.commits: dict[str, str] = {}
45115

46-
missing_messages: PackageMessages = {}
47116
for package, data in main_data.items():
48-
package_missing_messages: list[OldJsonExport] = []
117+
self.commits[package] = pr_data[package]["commit"]
118+
# First pass: exact-match removal.
119+
residual_old: list[OldJsonExport] = []
49120
for message in data["messages"]:
50121
try:
51122
pr_data[package]["messages"].remove(message)
52123
except ValueError:
53-
package_missing_messages.append(message)
54-
missing_messages[package] = PackageData(
55-
commit=pr_data[package]["commit"],
56-
messages=package_missing_messages,
124+
residual_old.append(message)
125+
126+
# Second pass: pair residuals by position to detect *changed*
127+
# messages (same diagnostic, different line or text).
128+
paired, final_missing, final_new = _match_residuals(
129+
residual_old, pr_data[package]["messages"]
57130
)
58131

59-
for package, pkg_missing in missing_messages.items():
60-
new_messages = pr_data[package]
61-
if not pkg_missing["messages"] and not new_messages["messages"]:
132+
self.missing_messages[package] = final_missing
133+
self.new_messages[package] = final_new
134+
self.changed_messages[package] = paired
135+
136+
def __iter__(
137+
self,
138+
) -> Iterator[
139+
tuple[
140+
str,
141+
list[OldJsonExport],
142+
list[OldJsonExport],
143+
list[ChangedMessage],
144+
]
145+
]:
146+
for package, missing in self.missing_messages.items():
147+
new = self.new_messages[package]
148+
changed = self.changed_messages[package]
149+
if not missing and not new and not changed:
62150
continue
63-
yield package, pkg_missing, new_messages
151+
yield package, missing, new, changed
64152

65153
@staticmethod
66-
def _load_json(file_path: Path | str) -> PackageMessages:
67-
with open(file_path, encoding="utf-8") as f:
68-
result: PackageMessages = json.load(f)
69-
return result
154+
def from_json(
155+
base_file: str, new_file: str, batches: int | None = None
156+
) -> Comparator:
157+
"""Build a Comparator from JSON file paths, handling batched runs."""
158+
main_data: PackageMessages
159+
pr_data: PackageMessages
160+
if batches is None:
161+
main_data = _load_json(base_file)
162+
pr_data = _load_json(new_file)
163+
else:
164+
main_data = {}
165+
pr_data = {}
166+
for idx in range(batches):
167+
main_data.update(
168+
_load_json(base_file.replace("BATCHIDX", "batch" + str(idx)))
169+
)
170+
pr_data.update(
171+
_load_json(new_file.replace("BATCHIDX", "batch" + str(idx)))
172+
)
173+
return Comparator(main_data, pr_data)
174+
175+
176+
def _load_json(file_path: Path | str) -> PackageMessages:
177+
with open(file_path, encoding="utf-8") as f:
178+
result: PackageMessages = json.load(f)
179+
return result

pylint/testutils/_primer/primer_compare_command.py

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,20 @@
55

66
from pathlib import PurePosixPath
77

8-
from pylint.testutils._primer.comparator import Comparator
9-
from pylint.testutils._primer.primer_command import PackageData, PrimerCommand
8+
from pylint.reporters.json_reporter import OldJsonExport
9+
from pylint.testutils._primer.comparator import (
10+
ChangedMessage,
11+
Comparator,
12+
message_diff,
13+
)
14+
from pylint.testutils._primer.primer_command import PrimerCommand
1015

1116
MAX_GITHUB_COMMENT_LENGTH = 65536
1217

1318

1419
class CompareCommand(PrimerCommand):
1520
def run(self) -> None:
16-
comparator = Comparator(
21+
comparator = Comparator.from_json(
1722
self.config.base_file, self.config.new_file, self.config.batches
1823
)
1924
comment = self._create_comment(comparator)
@@ -22,11 +27,15 @@ def run(self) -> None:
2227

2328
def _create_comment(self, comparator: Comparator) -> str:
2429
comment = ""
25-
for package, missing_messages, new_messages in comparator:
30+
for package, missing_messages, new_messages, changed_messages in comparator:
2631
if len(comment) >= MAX_GITHUB_COMMENT_LENGTH:
2732
break
2833
comment += self._create_comment_for_package(
29-
package, new_messages, missing_messages
34+
package,
35+
missing_messages,
36+
new_messages,
37+
changed_messages,
38+
comparator.commits[package],
3039
)
3140
comment = (
3241
f"🤖 **Effect of this PR on checked open source code:** 🤖\n\n{comment}"
@@ -39,32 +48,52 @@ def _create_comment(self, comparator: Comparator) -> str:
3948
return self._truncate_comment(comment)
4049

4150
def _create_comment_for_package(
42-
self, package: str, new_messages: PackageData, missing_messages: PackageData
51+
self,
52+
package: str,
53+
missing_messages: list[OldJsonExport],
54+
new_messages: list[OldJsonExport],
55+
changed_messages: list[ChangedMessage],
56+
commit: str,
4357
) -> str:
44-
comment = f"\n**Effect on [{package}]({self.packages[package].url}):**\n\n"
45-
# Create comment for new messages
46-
count = 1
58+
url = self.packages[package].url
59+
clone_dir = self.packages[package].clone_directory
60+
61+
assert not url.endswith(
62+
".git"
63+
), "You don't need the .git at the end of the github url."
64+
65+
def _source_link(msg: OldJsonExport) -> str:
66+
filepath = str(PurePosixPath(msg["path"]).relative_to(clone_dir))
67+
return f"{url}/blob/{commit}/{filepath}#L{msg['line']}"
68+
69+
comment = f"\n**Effect on [{package}]({url}):**\n\n"
70+
71+
# -- Changed messages (same diagnostic, different details) -----------
72+
if changed_messages:
73+
comment += "The following messages have been changed:\n\n<details>\n"
74+
for count, (old, new) in enumerate(changed_messages, 1):
75+
comment += (
76+
f"{count}) {new['symbol']}:\n"
77+
f"{message_diff(old, new)}\n"
78+
f"{_source_link(new)}\n"
79+
)
80+
comment += "</details>\n\n"
81+
82+
# -- New messages ----------------------------------------------------
4783
astroid_errors = 0
4884
new_non_astroid_messages = ""
49-
if new_messages["messages"]:
50-
print("Now emitted:")
51-
for message in new_messages["messages"]:
52-
filepath = str(
53-
PurePosixPath(message["path"]).relative_to(
54-
self.packages[package].clone_directory
55-
)
56-
)
57-
# Existing astroid errors may still show up as "new" because the timestamp
58-
# in the message is slightly different.
85+
count = 0
86+
for message in new_messages:
87+
# Existing astroid errors may still show up as "new" because the
88+
# timestamp in the message is slightly different.
5989
if message["symbol"] == "astroid-error":
6090
astroid_errors += 1
6191
else:
92+
count += 1
6293
new_non_astroid_messages += (
6394
f"{count}) {message['symbol']}:\n*{message['message']}*\n"
64-
f"{self.packages[package].url}/blob/{new_messages['commit']}/{filepath}#L{message['line']}\n"
95+
f"{_source_link(message)}\n"
6596
)
66-
print(message)
67-
count += 1
6897

6998
if astroid_errors:
7099
comment += (
@@ -78,29 +107,16 @@ def _create_comment_for_package(
78107
+ "</details>\n\n"
79108
)
80109

81-
# Create comment for missing messages
82-
count = 1
83-
if missing_messages["messages"]:
110+
# -- Missing messages ------------------------------------------------
111+
if missing_messages:
84112
comment += "The following messages are no longer emitted:\n\n<details>\n"
85-
print("No longer emitted:")
86-
for message in missing_messages["messages"]:
87-
comment += f"{count}) {message['symbol']}:\n*{message['message']}*\n"
88-
filepath = str(
89-
PurePosixPath(message["path"]).relative_to(
90-
self.packages[package].clone_directory
113+
for count, message in enumerate(missing_messages, 1):
114+
comment += (
115+
f"{count}) {message['symbol']}:\n*{message['message']}*\n"
116+
f"{_source_link(message)}\n"
91117
)
92-
)
93-
assert not self.packages[package].url.endswith(
94-
".git"
95-
), "You don't need the .git at the end of the github url."
96-
comment += (
97-
f"{self.packages[package].url}"
98-
f"/blob/{new_messages['commit']}/{filepath}#L{message['line']}\n"
99-
)
100-
count += 1
101-
print(message)
102-
if missing_messages["messages"]:
103118
comment += "</details>\n\n"
119+
104120
return comment
105121

106122
def _truncate_comment(self, comment: str) -> str:

tests/testutils/_primer/batched_cases/expected_comparator.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/testutils/_primer/cases/message_changed/expected.txt

Lines changed: 0 additions & 22 deletions
This file was deleted.

tests/testutils/_primer/cases/message_changed/expected_comparator.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/testutils/_primer/cases/new_message/expected_comparator.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)