diff --git a/doc/whatsnew/fragments/10880.internal b/doc/whatsnew/fragments/10880.internal new file mode 100644 index 0000000000..a6185c03a7 --- /dev/null +++ b/doc/whatsnew/fragments/10880.internal @@ -0,0 +1,3 @@ +``add_message`` now accepts optional ``module`` and ``filepath`` keyword arguments to override the reported message location. + +Refs #10880 diff --git a/doc/whatsnew/fragments/2368.bugfix b/doc/whatsnew/fragments/2368.bugfix new file mode 100644 index 0000000000..f620723e55 --- /dev/null +++ b/doc/whatsnew/fragments/2368.bugfix @@ -0,0 +1,3 @@ +``duplicate-code`` messages are now attributed to a module that actually contains the duplicated lines, instead of whichever module happened to be checked last. + +Closes #2368 diff --git a/pylint/checkers/base_checker.py b/pylint/checkers/base_checker.py index a6eefca386..e54c214737 100644 --- a/pylint/checkers/base_checker.py +++ b/pylint/checkers/base_checker.py @@ -139,6 +139,7 @@ def get_full_documentation( result += "\n" return result + # pylint: disable-next=too-many-arguments def add_message( self, msgid: str, @@ -149,9 +150,20 @@ def add_message( col_offset: int | None = None, end_lineno: int | None = None, end_col_offset: int | None = None, + module: str | None = None, + filepath: str | None = None, ) -> None: self.linter.add_message( - msgid, line, node, args, confidence, col_offset, end_lineno, end_col_offset + msgid, + line=line, + node=node, + args=args, + confidence=confidence, + col_offset=col_offset, + end_lineno=end_lineno, + end_col_offset=end_col_offset, + module=module, + filepath=filepath, ) def check_consistency(self) -> None: diff --git a/pylint/checkers/symilar.py b/pylint/checkers/symilar.py index 8d504f3364..aeacf81ba6 100644 --- a/pylint/checkers/symilar.py +++ b/pylint/checkers/symilar.py @@ -812,10 +812,12 @@ def __init__(self, linter: PyLinter) -> None: ignore_imports=self.linter.config.ignore_imports, ignore_signatures=self.linter.config.ignore_signatures, ) + self._module_filepaths: dict[str, str | None] = {} def open(self) -> None: """Init the checkers: reset linesets and statistics information.""" self.linesets = [] + self._module_filepaths.clear() self.linter.stats.reset_duplicated_lines() def process_module(self, node: nodes.Module) -> None: @@ -835,6 +837,7 @@ def process_module(self, node: nodes.Module) -> None: DeprecationWarning, stacklevel=2, ) + self._module_filepaths[self.linter.current_name] = self.linter.current_file with node.stream() as stream: self.append_stream(self.linter.current_name, stream, node.file_encoding) @@ -854,7 +857,15 @@ def close(self) -> None: for line in lineset.real_lines[start_line:end_line]: msg.append(line.rstrip()) - self.add_message("R0801", args=(len(couples), "\n".join(msg))) + # Attribute the message to the first module in alphabetical order rather + # than the last-checked module which may be unrelated (see #2368). + first_module = min(c[0].name for c in couples) + self.add_message( + "R0801", + args=(len(couples), "\n".join(msg)), + module=first_module, + filepath=self._module_filepaths.get(first_module), + ) duplicated += num * (len(couples) - 1) stats.nb_duplicated_lines += int(duplicated) stats.percent_duplicated_lines += float(total and duplicated * 100.0 / total) diff --git a/pylint/lint/pylinter.py b/pylint/lint/pylinter.py index 088e9c15c0..d5c386d919 100644 --- a/pylint/lint/pylinter.py +++ b/pylint/lint/pylinter.py @@ -1219,6 +1219,7 @@ def _report_evaluation(self, verbose: bool = False) -> int | None: self.reporter.display_reports(sect) return note + # pylint: disable-next=too-many-arguments def _add_one_message( self, message_definition: MessageDefinition, @@ -1229,6 +1230,8 @@ def _add_one_message( col_offset: int | None, end_lineno: int | None, end_col_offset: int | None, + module: str | None = None, + filepath: str | None = None, ) -> None: """After various checks have passed a single Message is passed to the reporter and added to stats. @@ -1282,11 +1285,15 @@ def _add_one_message( msg %= args # get module and object if node is None: - module, obj = self.current_name, "" + _module, obj = self.current_name, "" abspath = self.current_file else: - module, obj = utils.get_module_and_frameid(node) + _module, obj = utils.get_module_and_frameid(node) abspath = node.root().file + if module is not None: + _module = module + if filepath is not None: + abspath = filepath if abspath is not None: path = abspath.replace(self.reporter.path_strip_prefix, "", 1) else: @@ -1299,7 +1306,7 @@ def _add_one_message( MessageLocationTuple( abspath or "", path, - module or "", + _module or "", obj, line or 1, col_offset or 0, @@ -1311,6 +1318,7 @@ def _add_one_message( ) ) + # pylint: disable-next=too-many-arguments def add_message( self, msgid: str, @@ -1321,6 +1329,8 @@ def add_message( col_offset: int | None = None, end_lineno: int | None = None, end_col_offset: int | None = None, + module: str | None = None, + filepath: str | None = None, ) -> None: """Adds a message given by ID or name. @@ -1329,6 +1339,9 @@ def add_message( AST checkers must provide the node argument (but may optionally provide line if the line number is different), raw and token checkers must provide the line argument. + + The module and filepath parameters allow overriding the module name + and file path reported in the message location. """ if confidence is None: confidence = interfaces.UNDEFINED @@ -1343,6 +1356,8 @@ def add_message( col_offset, end_lineno, end_col_offset, + module=module, + filepath=filepath, ) def add_ignored_message( diff --git a/pylint/testutils/unittest_linter.py b/pylint/testutils/unittest_linter.py index a19afec568..eb206a4aae 100644 --- a/pylint/testutils/unittest_linter.py +++ b/pylint/testutils/unittest_linter.py @@ -28,6 +28,7 @@ def release_messages(self) -> list[MessageTest]: finally: self._messages = [] + # pylint: disable-next=too-many-arguments def add_message( self, msgid: str, @@ -39,6 +40,8 @@ def add_message( col_offset: int | None = None, end_lineno: int | None = None, end_col_offset: int | None = None, + module: str | None = None, + filepath: str | None = None, ) -> None: """Add a MessageTest to the _messages attribute of the linter class.""" # If confidence is None we set it to UNDEFINED as well in PyLinter diff --git a/tests/regrtest_data/duplicate_code/2368/__init__.py b/tests/regrtest_data/duplicate_code/2368/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/regrtest_data/duplicate_code/2368/databaselib.py b/tests/regrtest_data/duplicate_code/2368/databaselib.py new file mode 100644 index 0000000000..29e59cf5fb --- /dev/null +++ b/tests/regrtest_data/duplicate_code/2368/databaselib.py @@ -0,0 +1,14 @@ +"""Database utilities.""" + + +def setup_connection(host, port, database): + """Set up a database connection with retries.""" + retries = 3 + timeout = 30 + connection_string = f"{host}:{port}/{database}" + print(f"Connecting to {connection_string}") + for attempt in range(retries): + print(f"Attempt {attempt + 1} of {retries}") + if attempt > 0: + print(f"Waiting {timeout} seconds before retry") + return connection_string diff --git a/tests/regrtest_data/duplicate_code/2368/projectlib.py b/tests/regrtest_data/duplicate_code/2368/projectlib.py new file mode 100644 index 0000000000..6a613a3d5b --- /dev/null +++ b/tests/regrtest_data/duplicate_code/2368/projectlib.py @@ -0,0 +1,14 @@ +"""Project utilities.""" + + +def setup_connection(host, port, database): + """Set up a database connection with retries.""" + retries = 3 + timeout = 30 + connection_string = f"{host}:{port}/{database}" + print(f"Connecting to {connection_string}") + for attempt in range(retries): + print(f"Attempt {attempt + 1} of {retries}") + if attempt > 0: + print(f"Waiting {timeout} seconds before retry") + return connection_string diff --git a/tests/regrtest_data/duplicate_code/2368/xmlgen.py b/tests/regrtest_data/duplicate_code/2368/xmlgen.py new file mode 100644 index 0000000000..3dcc4b14e8 --- /dev/null +++ b/tests/regrtest_data/duplicate_code/2368/xmlgen.py @@ -0,0 +1,11 @@ +"""XML generation — no duplicate code here.""" +import xml.etree.ElementTree as ET + + +def generate_xml(data): + """Generate an XML document from data.""" + root = ET.Element("root") + for key, value in data.items(): + child = ET.SubElement(root, key) + child.text = str(value) + return ET.tostring(root, encoding="unicode") diff --git a/tests/test_similar.py b/tests/test_similar.py index c5357f4b6d..c5f334bba9 100644 --- a/tests/test_similar.py +++ b/tests/test_similar.py @@ -15,6 +15,7 @@ from pylint.reporters.text import TextReporter from pylint.testutils._run import _Run as Run +from pylint.testutils.reporter_for_tests import GenericTestReporter from pylint.testutils.utils import _patch_streams HERE = abspath(dirname(__file__)) @@ -266,6 +267,41 @@ def test_useless_suppression() -> None: ) assert not runner.linter.stats.by_msg + @staticmethod + def test_duplicate_code_module_attribution() -> None: + """R0801 should be attributed to a module with duplicates, not the last-checked one. + + Regression test for https://github.com/pylint-dev/pylint/issues/2368 + """ + path = join(DATA, "2368") + reporter = GenericTestReporter() + # Pass files explicitly with the unrelated module last, so that current_name + # points to a file without duplication when close() fires. + Run( + [ + join(path, "databaselib.py"), + join(path, "projectlib.py"), + join(path, "xmlgen.py"), + "--disable=all", + "--enable=duplicate-code", + "--min-similarity-lines=4", + ], + reporter=reporter, + exit=False, + ) + messages = reporter.messages + assert messages, "Expected at least one R0801 message" + for msg in messages: + assert msg.symbol == "duplicate-code" + # The message text contains "=={module_name}:[start:end]" for each + # involved module. The message's own .module must be one of them, + # but not the unrelated module. + involved = re.findall(r"==(\S+?):\[", msg.msg) + assert msg.module in involved, ( + f"R0801 attributed to {msg.module!r} which is not in " + f"the involved modules {involved}" + ) + def test_conditional_imports(self) -> None: """Tests enabling ignore-imports with conditional imports works correctly.""" path = join(DATA, "ignore_conditional_imports")