Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whatsnew/fragments/10880.internal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``add_message`` now accepts optional ``module`` and ``filepath`` keyword arguments to override the reported message location.

Refs #10880
3 changes: 3 additions & 0 deletions doc/whatsnew/fragments/2368.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``duplicate-code`` messages are now attributed to a module that actually contains the duplicated lines, instead of whichever module happened to be checked last.

Closes #2368
14 changes: 13 additions & 1 deletion pylint/checkers/base_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def get_full_documentation(
result += "\n"
return result

# pylint: disable-next=too-many-arguments
def add_message(
self,
msgid: str,
Expand All @@ -149,9 +150,20 @@ def add_message(
col_offset: int | None = None,
end_lineno: int | None = None,
end_col_offset: int | None = None,
module: str | None = None,
filepath: str | None = None,
) -> None:
self.linter.add_message(
msgid, line, node, args, confidence, col_offset, end_lineno, end_col_offset
msgid,
line=line,
node=node,
args=args,
confidence=confidence,
col_offset=col_offset,
end_lineno=end_lineno,
end_col_offset=end_col_offset,
module=module,
filepath=filepath,
)

def check_consistency(self) -> None:
Expand Down
13 changes: 12 additions & 1 deletion pylint/checkers/symilar.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,10 +812,12 @@ def __init__(self, linter: PyLinter) -> None:
ignore_imports=self.linter.config.ignore_imports,
ignore_signatures=self.linter.config.ignore_signatures,
)
self._module_filepaths: dict[str, str | None] = {}

def open(self) -> None:
"""Init the checkers: reset linesets and statistics information."""
self.linesets = []
self._module_filepaths.clear()
self.linter.stats.reset_duplicated_lines()

def process_module(self, node: nodes.Module) -> None:
Expand All @@ -835,6 +837,7 @@ def process_module(self, node: nodes.Module) -> None:
DeprecationWarning,
stacklevel=2,
)
self._module_filepaths[self.linter.current_name] = self.linter.current_file
with node.stream() as stream:
self.append_stream(self.linter.current_name, stream, node.file_encoding)

Expand All @@ -854,7 +857,15 @@ def close(self) -> None:
for line in lineset.real_lines[start_line:end_line]:
msg.append(line.rstrip())

self.add_message("R0801", args=(len(couples), "\n".join(msg)))
# Attribute the message to the first module in alphabetical order rather
# than the last-checked module which may be unrelated (see #2368).
first_module = min(c[0].name for c in couples)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the min do here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure we're getting the module with the lower alphabetical value, so the result is deterministic. We're only raising one message per couples.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps add this in the comment?

# Attribute the message to the first involved module rather than
# the last-checked module which may be unrelated (see #2368).

Does not really convey this to me

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.add_message(
"R0801",
args=(len(couples), "\n".join(msg)),
module=first_module,
filepath=self._module_filepaths.get(first_module),
)
duplicated += num * (len(couples) - 1)
stats.nb_duplicated_lines += int(duplicated)
stats.percent_duplicated_lines += float(total and duplicated * 100.0 / total)
Expand Down
21 changes: 18 additions & 3 deletions pylint/lint/pylinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,7 @@ def _report_evaluation(self, verbose: bool = False) -> int | None:
self.reporter.display_reports(sect)
return note

# pylint: disable-next=too-many-arguments
def _add_one_message(
self,
message_definition: MessageDefinition,
Expand All @@ -1229,6 +1230,8 @@ def _add_one_message(
col_offset: int | None,
end_lineno: int | None,
end_col_offset: int | None,
module: str | None = None,
filepath: str | None = None,
) -> None:
"""After various checks have passed a single Message is
passed to the reporter and added to stats.
Expand Down Expand Up @@ -1282,11 +1285,15 @@ def _add_one_message(
msg %= args
# get module and object
if node is None:
module, obj = self.current_name, ""
_module, obj = self.current_name, ""
abspath = self.current_file
else:
module, obj = utils.get_module_and_frameid(node)
_module, obj = utils.get_module_and_frameid(node)
abspath = node.root().file
if module is not None:
_module = module
if filepath is not None:
abspath = filepath
if abspath is not None:
path = abspath.replace(self.reporter.path_strip_prefix, "", 1)
else:
Expand All @@ -1299,7 +1306,7 @@ def _add_one_message(
MessageLocationTuple(
abspath or "",
path,
module or "",
_module or "",
obj,
line or 1,
col_offset or 0,
Expand All @@ -1311,6 +1318,7 @@ def _add_one_message(
)
)

# pylint: disable-next=too-many-arguments
def add_message(
self,
msgid: str,
Expand All @@ -1321,6 +1329,8 @@ def add_message(
col_offset: int | None = None,
end_lineno: int | None = None,
end_col_offset: int | None = None,
module: str | None = None,
filepath: str | None = None,
) -> None:
"""Adds a message given by ID or name.

Expand All @@ -1329,6 +1339,9 @@ def add_message(
AST checkers must provide the node argument (but may optionally
provide line if the line number is different), raw and token checkers
must provide the line argument.

The module and filepath parameters allow overriding the module name
and file path reported in the message location.
"""
if confidence is None:
confidence = interfaces.UNDEFINED
Expand All @@ -1343,6 +1356,8 @@ def add_message(
col_offset,
end_lineno,
end_col_offset,
module=module,
filepath=filepath,
)

def add_ignored_message(
Expand Down
3 changes: 3 additions & 0 deletions pylint/testutils/unittest_linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def release_messages(self) -> list[MessageTest]:
finally:
self._messages = []

# pylint: disable-next=too-many-arguments
def add_message(
self,
msgid: str,
Expand All @@ -39,6 +40,8 @@ def add_message(
col_offset: int | None = None,
end_lineno: int | None = None,
end_col_offset: int | None = None,
module: str | None = None,
filepath: str | None = None,
) -> None:
"""Add a MessageTest to the _messages attribute of the linter class."""
# If confidence is None we set it to UNDEFINED as well in PyLinter
Expand Down
Empty file.
14 changes: 14 additions & 0 deletions tests/regrtest_data/duplicate_code/2368/databaselib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Database utilities."""


def setup_connection(host, port, database):
"""Set up a database connection with retries."""
retries = 3
timeout = 30
connection_string = f"{host}:{port}/{database}"
print(f"Connecting to {connection_string}")
for attempt in range(retries):
print(f"Attempt {attempt + 1} of {retries}")
if attempt > 0:
print(f"Waiting {timeout} seconds before retry")
return connection_string
14 changes: 14 additions & 0 deletions tests/regrtest_data/duplicate_code/2368/projectlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Project utilities."""


def setup_connection(host, port, database):
"""Set up a database connection with retries."""
retries = 3
timeout = 30
connection_string = f"{host}:{port}/{database}"
print(f"Connecting to {connection_string}")
for attempt in range(retries):
print(f"Attempt {attempt + 1} of {retries}")
if attempt > 0:
print(f"Waiting {timeout} seconds before retry")
return connection_string
11 changes: 11 additions & 0 deletions tests/regrtest_data/duplicate_code/2368/xmlgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""XML generation β€” no duplicate code here."""
import xml.etree.ElementTree as ET


def generate_xml(data):
"""Generate an XML document from data."""
root = ET.Element("root")
for key, value in data.items():
child = ET.SubElement(root, key)
child.text = str(value)
return ET.tostring(root, encoding="unicode")
36 changes: 36 additions & 0 deletions tests/test_similar.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pylint.reporters.text import TextReporter
from pylint.testutils._run import _Run as Run
from pylint.testutils.reporter_for_tests import GenericTestReporter
from pylint.testutils.utils import _patch_streams

HERE = abspath(dirname(__file__))
Expand Down Expand Up @@ -266,6 +267,41 @@ def test_useless_suppression() -> None:
)
assert not runner.linter.stats.by_msg

@staticmethod
def test_duplicate_code_module_attribution() -> None:
"""R0801 should be attributed to a module with duplicates, not the last-checked one.

Regression test for https://github.com/pylint-dev/pylint/issues/2368
"""
path = join(DATA, "2368")
reporter = GenericTestReporter()
# Pass files explicitly with the unrelated module last, so that current_name
# points to a file without duplication when close() fires.
Run(
[
join(path, "databaselib.py"),
join(path, "projectlib.py"),
join(path, "xmlgen.py"),
"--disable=all",
"--enable=duplicate-code",
"--min-similarity-lines=4",
],
reporter=reporter,
exit=False,
)
messages = reporter.messages
assert messages, "Expected at least one R0801 message"
for msg in messages:
assert msg.symbol == "duplicate-code"
# The message text contains "=={module_name}:[start:end]" for each
# involved module. The message's own .module must be one of them,
# but not the unrelated module.
involved = re.findall(r"==(\S+?):\[", msg.msg)
assert msg.module in involved, (
f"R0801 attributed to {msg.module!r} which is not in "
f"the involved modules {involved}"
)

def test_conditional_imports(self) -> None:
"""Tests enabling ignore-imports with conditional imports works correctly."""
path = join(DATA, "ignore_conditional_imports")
Expand Down
Loading