55from __future__ import annotations
66
77import json
8- from collections .abc import Generator
8+ from collections import defaultdict
9+ from collections .abc import Iterator
10+ from difflib import SequenceMatcher
911from pathlib import Path
1012
1113from pylint .reporters .json_reporter import JSONMessage
12- from pylint .testutils ._primer .primer_command import (
13- PackageData ,
14- PackageMessages ,
15- )
14+ from pylint .testutils ._primer .primer_command import PackageMessages
15+
16+ ChangedMessage = tuple [JSONMessage , JSONMessage ] # (old, new)
17+ PackageDiff = tuple [str , list [JSONMessage ], list [JSONMessage ], list [ChangedMessage ]]
18+
19+ _LOCATION_KEYS = {"line" , "column" , "endLine" , "endColumn" }
20+
21+
22+ def _position_key (msg : JSONMessage ) -> tuple [str , str , str ]:
23+ """Key that identifies a diagnostic independently of its text or location.
24+
25+ Two messages that share (symbol, path, obj) are the "same diagnostic" — if
26+ they differ only in line numbers or message text, they should be reported as
27+ *changed* rather than as a separate removal + addition.
28+ """
29+ return (msg ["symbol" ], msg ["path" ], msg ["obj" ])
30+
31+
32+ def _match_residuals (
33+ old_residuals : list [JSONMessage ], new_residuals : list [JSONMessage ]
34+ ) -> tuple [list [ChangedMessage ], list [JSONMessage ], list [JSONMessage ]]:
35+ """Pair residual messages by position key ``(symbol, path, obj)``.
36+
37+ Messages that share the same key are paired 1:1 in order. Any left-over
38+ messages remain as genuinely missing or genuinely new.
39+
40+ Returns ``(paired, unmatched_old, unmatched_new)``.
41+ """
42+ old_by_key : dict [tuple [str , str , str ], list [JSONMessage ]] = defaultdict (list )
43+ new_by_key : dict [tuple [str , str , str ], list [JSONMessage ]] = defaultdict (list )
44+ for m in old_residuals :
45+ old_by_key [_position_key (m )].append (m )
46+ for m in new_residuals :
47+ new_by_key [_position_key (m )].append (m )
48+
49+ paired : list [ChangedMessage ] = []
50+ paired_old_ids : set [int ] = set ()
51+ paired_new_ids : set [int ] = set ()
52+ for key in old_by_key :
53+ if key not in new_by_key :
54+ continue
55+ for old , new in zip (old_by_key [key ], new_by_key [key ]):
56+ paired .append ((old , new ))
57+ paired_old_ids .add (id (old ))
58+ paired_new_ids .add (id (new ))
59+
60+ final_missing = [m for m in old_residuals if id (m ) not in paired_old_ids ]
61+ final_new = [m for m in new_residuals if id (m ) not in paired_new_ids ]
62+ return paired , final_missing , final_new
63+
64+
65+ def format_span (msg : JSONMessage ) -> str :
66+ """Format a message's location as ``line:col to endLine:endCol``."""
67+ start = f"{ msg ['line' ]} :{ msg ['column' ]} "
68+ end_line = msg .get ("endLine" )
69+ end_col = msg .get ("endColumn" )
70+ if end_line is not None and end_col is not None :
71+ return f"from `{ start } ` to `{ end_line } :{ end_col } `"
72+ return f"at `{ start } `"
73+
74+
75+ def message_diff (old : JSONMessage , new : JSONMessage ) -> str :
76+ """Return a compact summary of changed fields between two messages.
77+
78+ Location changes are merged into a single human-readable span.
79+ String fields (like ``message``) get a ``diff`` code block so GitHub
80+ renders them with red/green highlighting.
81+ """
82+ changed_keys : set [str ] = set ()
83+ for key in old :
84+ if old [key ] != new [key ]: # type: ignore[literal-required]
85+ changed_keys .add (key )
86+
87+ parts : list [str ] = []
88+ # Location: combine line/column/endLine/endColumn into one sentence.
89+ if changed_keys & _LOCATION_KEYS :
90+ parts .append (f"Was raised { format_span (old )} , now { format_span (new )} ." )
91+
92+ # Other fields (typically ``message`` or ``type``).
93+ for key in sorted (changed_keys - _LOCATION_KEYS ):
94+ old_val = old [key ] # type: ignore[literal-required]
95+ new_val = new [key ] # type: ignore[literal-required]
96+ if (
97+ isinstance (old_val , str )
98+ and isinstance (new_val , str )
99+ and (len (old_val ) > 40 or len (new_val ) > 40 )
100+ ):
101+ caret_line = _caret_hint (old_val , new_val )
102+ diff_block = f"```diff\n - { old_val } \n + { new_val } \n "
103+ if caret_line :
104+ diff_block += f" { caret_line } \n "
105+ diff_block += "```"
106+ parts .append (diff_block )
107+ else :
108+ parts .append (f"{ key } : `{ old_val } ` → `{ new_val } `" )
109+ return "\n \n " .join (parts )
110+
111+
112+ def _caret_hint (old : str , new : str ) -> str :
113+ """Build a ``^`` marker line highlighting changed spans in *new*.
114+
115+ Uses SequenceMatcher to find which parts of *new* differ from *old*
116+ and places ``^`` characters under them (aligned with the ``+ `` prefix).
117+ Returns an empty string when the whole line changed (carets wouldn't help).
118+ """
119+ matcher = SequenceMatcher (None , old , new )
120+ carets = [" " ] * len (new )
121+ changed_count = 0
122+ for tag , _i1 , _i2 , j1 , j2 in matcher .get_opcodes ():
123+ if tag != "equal" :
124+ for j in range (j1 , j2 ):
125+ carets [j ] = "^"
126+ changed_count += j2 - j1
127+ # If most of the string changed, carets are just noise.
128+ if changed_count > len (new ) * 0.6 :
129+ return ""
130+ return "" .join (carets ).rstrip ()
16131
17132
18133class Comparator :
19- """Cross-reference two primer JSON outputs and iterate over differences."""
134+ """Cross-reference two primer JSON outputs and iterate over differences.
135+
136+ Yields ``(package, missing, new, changed)`` for each package that has at
137+ least one difference. *changed* contains pairs of ``(old, new)`` messages
138+ that are the same diagnostic but with altered details (line, message text,
139+ etc.).
140+ """
20141
21142 def __init__ (self , main_data : PackageMessages , pr_data : PackageMessages ) -> None :
22143 self ._main_data = main_data
23144 self ._pr_data = pr_data
145+ self .commits : dict [str , str ] = {
146+ pkg : data ["commit" ] for pkg , data in pr_data .items ()
147+ }
24148
25149 @staticmethod
26150 def from_json (
@@ -45,28 +169,29 @@ def from_json(
45169 )
46170 return Comparator (main_data , pr_data )
47171
48- def __iter__ (self ) -> Generator [ tuple [ str , PackageData , PackageData ] ]:
172+ def __iter__ (self ) -> Iterator [ PackageDiff ]:
49173 main_data = self ._main_data
50174 pr_data = self ._pr_data
51175
52- missing_messages : PackageMessages = {}
53176 for package , data in main_data .items ():
54- package_missing_messages : list [JSONMessage ] = []
177+ # First pass: exact-match removal.
178+ pr_messages = list (pr_data [package ]["messages" ])
179+ residual_old : list [JSONMessage ] = []
55180 for message in data ["messages" ]:
56181 try :
57- pr_data [ package ][ "messages" ] .remove (message )
182+ pr_messages .remove (message )
58183 except ValueError :
59- package_missing_messages .append (message )
60- missing_messages [package ] = PackageData (
61- commit = pr_data [package ]["commit" ],
62- messages = package_missing_messages ,
184+ residual_old .append (message )
185+
186+ # Second pass: pair residuals by position to detect *changed*
187+ # messages (same diagnostic, different line or text).
188+ paired , final_missing , final_new = _match_residuals (
189+ residual_old , pr_messages
63190 )
64191
65- for package , pkg_missing in missing_messages .items ():
66- new_messages = pr_data [package ]
67- if not pkg_missing ["messages" ] and not new_messages ["messages" ]:
192+ if not final_missing and not final_new and not paired :
68193 continue
69- yield package , pkg_missing , new_messages
194+ yield package , final_missing , final_new , paired
70195
71196 @staticmethod
72197 def _load_json (file_path : Path | str ) -> PackageMessages :
0 commit comments