Skip to content

Commit 12b0468

Browse files
Attribute duplicate-code messages to involved modules, not the last-checked one
R0801 messages emitted in SimilaritiesChecker.close() were attributed to whichever module happened to be checked last, because add_message() without a node falls back to linter.current_name. Now the checker saves a module→filepath mapping during process_module() and sets the correct module context before each add_message() call. Closes #2368 Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 156158d commit 12b0468

3 files changed

Lines changed: 51 additions & 0 deletions

File tree

doc/whatsnew/fragments/2368.bugfix

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
``duplicate-code`` messages are now attributed to a module that actually contains the duplicated lines, instead of whichever module happened to be checked last.
2+
3+
Closes #2368

pylint/checkers/symilar.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,10 +812,12 @@ def __init__(self, linter: PyLinter) -> None:
812812
ignore_imports=self.linter.config.ignore_imports,
813813
ignore_signatures=self.linter.config.ignore_signatures,
814814
)
815+
self._module_filepaths: dict[str, str | None] = {}
815816

816817
def open(self) -> None:
817818
"""Init the checkers: reset linesets and statistics information."""
818819
self.linesets = []
820+
self._module_filepaths.clear()
819821
self.linter.stats.reset_duplicated_lines()
820822

821823
def process_module(self, node: nodes.Module) -> None:
@@ -835,6 +837,7 @@ def process_module(self, node: nodes.Module) -> None:
835837
DeprecationWarning,
836838
stacklevel=2,
837839
)
840+
self._module_filepaths[self.linter.current_name] = self.linter.current_file
838841
with node.stream() as stream:
839842
self.append_stream(self.linter.current_name, stream, node.file_encoding)
840843

@@ -843,6 +846,8 @@ def close(self) -> None:
843846
total = sum(len(lineset) for lineset in self.linesets)
844847
duplicated = 0
845848
stats = self.linter.stats
849+
original_name = self.linter.current_name
850+
original_file = self.linter.current_file
846851
for num, couples in self._compute_sims():
847852
msg = []
848853
lineset = start_line = end_line = None
@@ -854,8 +859,15 @@ def close(self) -> None:
854859
for line in lineset.real_lines[start_line:end_line]:
855860
msg.append(line.rstrip())
856861

862+
# Attribute the message to the first involved module rather than
863+
# the last-checked module which may be unrelated (see #2368).
864+
first_module = min(c[0].name for c in couples)
865+
self.linter.current_name = first_module
866+
self.linter.current_file = self._module_filepaths.get(first_module)
857867
self.add_message("R0801", args=(len(couples), "\n".join(msg)))
858868
duplicated += num * (len(couples) - 1)
869+
self.linter.current_name = original_name
870+
self.linter.current_file = original_file
859871
stats.nb_duplicated_lines += int(duplicated)
860872
stats.percent_duplicated_lines += float(total and duplicated * 100.0 / total)
861873

tests/test_similar.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from pylint.reporters.text import TextReporter
1717
from pylint.testutils._run import _Run as Run
18+
from pylint.testutils.reporter_for_tests import GenericTestReporter
1819
from pylint.testutils.utils import _patch_streams
1920

2021
HERE = abspath(dirname(__file__))
@@ -266,6 +267,41 @@ def test_useless_suppression() -> None:
266267
)
267268
assert not runner.linter.stats.by_msg
268269

270+
@staticmethod
271+
def test_duplicate_code_module_attribution() -> None:
272+
"""R0801 should be attributed to a module with duplicates, not the last-checked one.
273+
274+
Regression test for https://github.com/pylint-dev/pylint/issues/2368
275+
"""
276+
path = join(DATA, "2368")
277+
reporter = GenericTestReporter()
278+
# Pass files explicitly with the unrelated module last, so that current_name
279+
# points to a file without duplication when close() fires.
280+
Run(
281+
[
282+
join(path, "databaselib.py"),
283+
join(path, "projectlib.py"),
284+
join(path, "xmlgen.py"),
285+
"--disable=all",
286+
"--enable=duplicate-code",
287+
"--min-similarity-lines=4",
288+
],
289+
reporter=reporter,
290+
exit=False,
291+
)
292+
messages = reporter.messages
293+
assert messages, "Expected at least one R0801 message"
294+
for msg in messages:
295+
assert msg.symbol == "duplicate-code"
296+
# The message text contains "=={module_name}:[start:end]" for each
297+
# involved module. The message's own .module must be one of them,
298+
# but not the unrelated module.
299+
involved = re.findall(r"==(\S+?):\[", msg.msg)
300+
assert msg.module in involved, (
301+
f"R0801 attributed to {msg.module!r} which is not in "
302+
f"the involved modules {involved}"
303+
)
304+
269305
def test_conditional_imports(self) -> None:
270306
"""Tests enabling ignore-imports with conditional imports works correctly."""
271307
path = join(DATA, "ignore_conditional_imports")

0 commit comments

Comments
 (0)