From 88144842f9b1b5d62b291bca4d0ef6e858e8eca8 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 16 Mar 2026 08:22:18 +0100 Subject: [PATCH] Bugfix and refactor deep_merge The previous implementation didn't deep-copy values. The current implementation fixes this and adds additional tests to cover also tuples and sets. Test cases amended to cover also tests for truely deep copies. Test coverage for test_deep_merge.py is 100% --- changelog.d/202.refactor.rst | 4 + src/docbuild/config/merge.py | 63 ++++++++----- tests/config/app/test_deep_merge.py | 139 ++++++++++++++++++++++++++-- 3 files changed, 178 insertions(+), 28 deletions(-) create mode 100644 changelog.d/202.refactor.rst diff --git a/changelog.d/202.refactor.rst b/changelog.d/202.refactor.rst new file mode 100644 index 00000000..396ecb6a --- /dev/null +++ b/changelog.d/202.refactor.rst @@ -0,0 +1,4 @@ +The implementation of :func:`~docbuild.config.app.deep_merge` has been +fixed to properly handle deep copies of lists, tuples, and sets. +Refactored to be more robust and handle a wider variety of data types +(including lists, tuples, and sets) correctly during the merging process. diff --git a/src/docbuild/config/merge.py b/src/docbuild/config/merge.py index 27caafd2..8addb1c9 100644 --- a/src/docbuild/config/merge.py +++ b/src/docbuild/config/merge.py @@ -1,23 +1,22 @@ """Merge multiple dictionaries into a new one without modifying inputs.""" from collections.abc import Mapping +from copy import deepcopy from typing import Any -def deep_merge( - # dct1: , - *dcts: dict[str, Any], -) -> dict[str, Any]: +def deep_merge(*dcts: Mapping[str, Any]) -> dict[str, Any]: """Merge multiple dictionaries into a new one without modifying inputs. Make a deep copy of the first dictionary and then update the copy with the subsequent dictionaries: - * If a key exists in both dictionaries, the value from the last dictionary - will overwrite the previous one. - * If the value is a list, it will concatenate the lists. - * If the value is a primitive type, it will overwrite the value. - * If a key exists in multiple dictionaries, the last one will take precedence. + * If a key exists in both dictionaries and both values are Mappings, + they will be merged iteratively. + * If both values are lists or tuples, they will be concatenated. + * If both values are sets, they will be unioned. + * Otherwise (different types or primitive values), the value from the + subsequent dictionary will overwrite the previous one. This means that the order of dictionaries matters. The first dictionary will be the base, and the subsequent dictionaries will update it. @@ -29,22 +28,42 @@ def deep_merge( if not dcts: return {} - # Start with a shallow copy of the first dictionary - merged = dcts[0].copy() + result = deepcopy(dict(dcts[0])) - for d in dcts[1:]: - stack = [(merged, d)] + for src in dcts[1:]: + stack: list[tuple[dict[str, Any], Mapping[str, Any]]] = [(result, src)] while stack: - d1, d2 = stack.pop() - for key, value in d2.items(): - if ( - key in merged - and isinstance(d1[key], dict | Mapping) - and isinstance(value, dict | Mapping) + dest, current = stack.pop() + + for key, value in current.items(): + if key not in dest: + dest[key] = deepcopy(value) + continue + + existing = dest[key] + + # Mapping → merge deeper + if isinstance(existing, Mapping) and isinstance(value, Mapping): + # Ensure we are working with a dict for the merge target + # This avoids nested specialized types if they support item assignment + if type(existing) is not dict: + existing = dict(existing) + dest[key] = existing + stack.append((existing, value)) + + # Lists / tuples → concatenate + elif (isinstance(existing, list) and isinstance(value, list)) or ( + isinstance(existing, tuple) and isinstance(value, tuple) ): - stack.append((d1[key], value)) + dest[key] = existing + deepcopy(value) # type: ignore[operator] + + # Sets → union + elif isinstance(existing, set) and isinstance(value, set): + dest[key] = existing | deepcopy(value) + + # Fallback → overwrite else: - d1[key] = value + dest[key] = deepcopy(value) - return merged + return result diff --git a/tests/config/app/test_deep_merge.py b/tests/config/app/test_deep_merge.py index 6f63f97c..93bc8db6 100644 --- a/tests/config/app/test_deep_merge.py +++ b/tests/config/app/test_deep_merge.py @@ -6,17 +6,144 @@ @pytest.mark.parametrize( "thedicts,expected", [ - # 1 - ([{"a": 1}, {"a": 2}], {"a": 2}), - # 2 - ([{"a": 1, "b": 2}, {"a": 2}], {"a": 2, "b": 2}), - # 3 - ( + # + pytest.param([], {}, id="empty"), + # + pytest.param([{"a": 1}, {"a": 2}], {"a": 2}, id="overwrite_scalar"), + # + pytest.param( + [{"a": 1, "b": 2}, {"a": 2}], + {"a": 2, "b": 2}, + id="merge_keys_overwrite_scalar", + ), + # + pytest.param( [{"db": {"host": "localhost", "port": 1234}}, {"db": {"port": 5432}}], {"db": {"host": "localhost", "port": 5432}}, + id="nested_dicts_recursive", + ), + # + pytest.param( + [{"a": [1, 2]}, {"a": [3, 4]}], + {"a": [1, 2, 3, 4]}, + id="list_concatenation", + ), + # + pytest.param( + [{"t": (1, 2)}, {"t": (3, 4)}], + {"t": (1, 2, 3, 4)}, + id="tuple_concatenation", + ), + # + pytest.param( + [{"a": [1, 2]}, {"a": 3}], + {"a": 3}, + id="overwrite_list_with_scalar", ), # + pytest.param( + [{"a": 1}, {"a": [2, 3]}], + {"a": [2, 3]}, + id="overwrite_scalar_with_list", + ), + # + pytest.param( + [{"x": {"l": ["a"]}}, {"x": {"l": ["b"]}}], + {"x": {"l": ["a", "b"]}}, + id="nested_list_concatenation", + ), + # + pytest.param( + [{"a": [{"x": 1}]}, {"a": [{"y": 2}]}], + {"a": [{"x": 1}, {"y": 2}]}, + id="list_of_dicts_concatenation", + ), + # + pytest.param( + [{"s": {1, 2}}, {"s": {3, 4}}], + {"s": {1, 2, 3, 4}}, + id="set_union", + ), + # + pytest.param( + [{"s": {1, 2}}, {"s": {2, 3}}], + {"s": {1, 2, 3}}, + id="set_union_overlap", + ), ], ) def test_deep_merge(thedicts, expected): assert deep_merge(*thedicts) == expected + + +def test_deep_merge_is_truly_deep(): + d1 = {"nested": [1, 2], "deep": {"a": 1}} + d2 = {} + merged = deep_merge(d1, d2) + + # Modify result + merged["nested"].append(3) + merged["deep"]["b"] = 2 + + # Assert original is untouched + assert d1["nested"] == [1, 2] + assert "b" not in d1["deep"] + + +def test_three_way_merge(): + d1 = {"a": 1} + d2 = {"b": 2} + d3 = {"c": 3} + assert deep_merge(d1, d2, d3) == {"a": 1, "b": 2, "c": 3} + + +def test_deep_merge_inputs_remain_independent(): + # Verify that subsequent dictionaries (not just the first) are also treated as immutable sources + d1 = {} + d2 = {"a": [1, 2], "b": {"x": 1}} + merged = deep_merge(d1, d2) + + # Modify result + merged["a"].append(3) + merged["b"]["y"] = 2 + + # Assert input d2 is untouched + assert d2["a"] == [1, 2] + assert "y" not in d2["b"] + + +def test_deep_merge_return_type(): + class MyMap(dict): + pass + + m1 = MyMap({"a": 1}) + m2 = {"b": 2} + result = deep_merge(m1, m2) + + assert isinstance(result, dict) + assert not isinstance(result, MyMap) + assert result == {"a": 1, "b": 2} + + +def test_deep_merge_converts_nested_custom_mapping_to_dict(): + class MyMap(dict): + pass + + # MyMap instance nested inside a standard dict + d1 = {"nested": MyMap({"a": 1})} + d2 = {"nested": {"b": 2}} + + result = deep_merge(d1, d2) + + # Check that merge happened correctly + assert result == {"nested": {"a": 1, "b": 2}} + + # Check that the nested dictionary is a plain dict, not MyMap + # This verifies that the line `existing = dict(existing)` was executed + # and the result contains the converted dict. + assert type(result["nested"]) is dict + assert not isinstance(result["nested"], MyMap) + + # Verify original d1 is untouched + assert isinstance(d1["nested"], MyMap) + assert d1["nested"] == {"a": 1}