Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog.d/202.refactor.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The implementation of :func:`~docbuild.config.app.deep_merge` has been
fixed to properly handle deep copies of lists, tuples, and sets.
Refactored to be more robust and handle a wider variety of data types
(including lists, tuples, and sets) correctly during the merging process.
63 changes: 41 additions & 22 deletions src/docbuild/config/merge.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
"""Merge multiple dictionaries into a new one without modifying inputs."""

from collections.abc import Mapping
from copy import deepcopy
from typing import Any


def deep_merge(
# dct1: ,
*dcts: dict[str, Any],
) -> dict[str, Any]:
def deep_merge(*dcts: Mapping[str, Any]) -> dict[str, Any]:
"""Merge multiple dictionaries into a new one without modifying inputs.

Make a deep copy of the first dictionary and then update the copy with the
subsequent dictionaries:

* If a key exists in both dictionaries, the value from the last dictionary
will overwrite the previous one.
* If the value is a list, it will concatenate the lists.
* If the value is a primitive type, it will overwrite the value.
* If a key exists in multiple dictionaries, the last one will take precedence.
* If a key exists in both dictionaries and both values are Mappings,
they will be merged iteratively.
* If both values are lists or tuples, they will be concatenated.
* If both values are sets, they will be unioned.
* Otherwise (different types or primitive values), the value from the
subsequent dictionary will overwrite the previous one.

This means that the order of dictionaries matters. The first dictionary
will be the base, and the subsequent dictionaries will update it.
Expand All @@ -29,22 +28,42 @@ def deep_merge(
if not dcts:
return {}

# Start with a shallow copy of the first dictionary
merged = dcts[0].copy()
result = deepcopy(dict(dcts[0]))

for d in dcts[1:]:
stack = [(merged, d)]
for src in dcts[1:]:
stack: list[tuple[dict[str, Any], Mapping[str, Any]]] = [(result, src)]

while stack:
d1, d2 = stack.pop()
for key, value in d2.items():
if (
key in merged
and isinstance(d1[key], dict | Mapping)
and isinstance(value, dict | Mapping)
dest, current = stack.pop()

for key, value in current.items():
if key not in dest:
dest[key] = deepcopy(value)
continue

existing = dest[key]

# Mapping → merge deeper
if isinstance(existing, Mapping) and isinstance(value, Mapping):
# Ensure we are working with a dict for the merge target
# This avoids nested specialized types if they support item assignment
if type(existing) is not dict:
existing = dict(existing)
dest[key] = existing
stack.append((existing, value))

# Lists / tuples → concatenate
elif (isinstance(existing, list) and isinstance(value, list)) or (
isinstance(existing, tuple) and isinstance(value, tuple)
):
stack.append((d1[key], value))
dest[key] = existing + deepcopy(value) # type: ignore[operator]

# Sets → union
elif isinstance(existing, set) and isinstance(value, set):
dest[key] = existing | deepcopy(value)

# Fallback → overwrite
else:
d1[key] = value
dest[key] = deepcopy(value)

return merged
return result
139 changes: 133 additions & 6 deletions tests/config/app/test_deep_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,144 @@
@pytest.mark.parametrize(
"thedicts,expected",
[
# 1
([{"a": 1}, {"a": 2}], {"a": 2}),
# 2
([{"a": 1, "b": 2}, {"a": 2}], {"a": 2, "b": 2}),
# 3
(
#
pytest.param([], {}, id="empty"),
#
pytest.param([{"a": 1}, {"a": 2}], {"a": 2}, id="overwrite_scalar"),
#
pytest.param(
[{"a": 1, "b": 2}, {"a": 2}],
{"a": 2, "b": 2},
id="merge_keys_overwrite_scalar",
),
#
pytest.param(
[{"db": {"host": "localhost", "port": 1234}}, {"db": {"port": 5432}}],
{"db": {"host": "localhost", "port": 5432}},
id="nested_dicts_recursive",
),
#
pytest.param(
[{"a": [1, 2]}, {"a": [3, 4]}],
{"a": [1, 2, 3, 4]},
id="list_concatenation",
),
#
pytest.param(
[{"t": (1, 2)}, {"t": (3, 4)}],
{"t": (1, 2, 3, 4)},
id="tuple_concatenation",
),
#
pytest.param(
[{"a": [1, 2]}, {"a": 3}],
{"a": 3},
id="overwrite_list_with_scalar",
),
#
pytest.param(
[{"a": 1}, {"a": [2, 3]}],
{"a": [2, 3]},
id="overwrite_scalar_with_list",
),
#
pytest.param(
[{"x": {"l": ["a"]}}, {"x": {"l": ["b"]}}],
{"x": {"l": ["a", "b"]}},
id="nested_list_concatenation",
),
#
pytest.param(
[{"a": [{"x": 1}]}, {"a": [{"y": 2}]}],
{"a": [{"x": 1}, {"y": 2}]},
id="list_of_dicts_concatenation",
),
#
pytest.param(
[{"s": {1, 2}}, {"s": {3, 4}}],
{"s": {1, 2, 3, 4}},
id="set_union",
),
#
pytest.param(
[{"s": {1, 2}}, {"s": {2, 3}}],
{"s": {1, 2, 3}},
id="set_union_overlap",
),
],
)
def test_deep_merge(thedicts, expected):
assert deep_merge(*thedicts) == expected


def test_deep_merge_is_truly_deep():
d1 = {"nested": [1, 2], "deep": {"a": 1}}
d2 = {}
merged = deep_merge(d1, d2)

# Modify result
merged["nested"].append(3)
merged["deep"]["b"] = 2

# Assert original is untouched
assert d1["nested"] == [1, 2]
assert "b" not in d1["deep"]


def test_three_way_merge():
d1 = {"a": 1}
d2 = {"b": 2}
d3 = {"c": 3}
assert deep_merge(d1, d2, d3) == {"a": 1, "b": 2, "c": 3}


def test_deep_merge_inputs_remain_independent():
# Verify that subsequent dictionaries (not just the first) are also treated as immutable sources
d1 = {}
d2 = {"a": [1, 2], "b": {"x": 1}}
merged = deep_merge(d1, d2)

# Modify result
merged["a"].append(3)
merged["b"]["y"] = 2

# Assert input d2 is untouched
assert d2["a"] == [1, 2]
assert "y" not in d2["b"]


def test_deep_merge_return_type():
class MyMap(dict):
pass

m1 = MyMap({"a": 1})
m2 = {"b": 2}
result = deep_merge(m1, m2)

assert isinstance(result, dict)
assert not isinstance(result, MyMap)
assert result == {"a": 1, "b": 2}


def test_deep_merge_converts_nested_custom_mapping_to_dict():
class MyMap(dict):
pass

# MyMap instance nested inside a standard dict
d1 = {"nested": MyMap({"a": 1})}
d2 = {"nested": {"b": 2}}

result = deep_merge(d1, d2)

# Check that merge happened correctly
assert result == {"nested": {"a": 1, "b": 2}}

# Check that the nested dictionary is a plain dict, not MyMap
# This verifies that the line `existing = dict(existing)` was executed
# and the result contains the converted dict.
assert type(result["nested"]) is dict
assert not isinstance(result["nested"], MyMap)

# Verify original d1 is untouched
assert isinstance(d1["nested"], MyMap)
assert d1["nested"] == {"a": 1}
Loading