Skip to content

Commit 2aba009

Browse files
authored
Bugfix and refactor deep_merge (#202)
The previous implementation didn't deep-copy values. The current implementation fixes this and adds additional tests to cover also tuples and sets. Test cases amended to cover also tests for truely deep copies. Test coverage for test_deep_merge.py is 100%
1 parent 00ac6b8 commit 2aba009

3 files changed

Lines changed: 178 additions & 28 deletions

File tree

changelog.d/202.refactor.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The implementation of :func:`~docbuild.config.app.deep_merge` has been
2+
fixed to properly handle deep copies of lists, tuples, and sets.
3+
Refactored to be more robust and handle a wider variety of data types
4+
(including lists, tuples, and sets) correctly during the merging process.

src/docbuild/config/merge.py

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
11
"""Merge multiple dictionaries into a new one without modifying inputs."""
22

33
from collections.abc import Mapping
4+
from copy import deepcopy
45
from typing import Any
56

67

7-
def deep_merge(
8-
# dct1: ,
9-
*dcts: dict[str, Any],
10-
) -> dict[str, Any]:
8+
def deep_merge(*dcts: Mapping[str, Any]) -> dict[str, Any]:
119
"""Merge multiple dictionaries into a new one without modifying inputs.
1210
1311
Make a deep copy of the first dictionary and then update the copy with the
1412
subsequent dictionaries:
1513
16-
* If a key exists in both dictionaries, the value from the last dictionary
17-
will overwrite the previous one.
18-
* If the value is a list, it will concatenate the lists.
19-
* If the value is a primitive type, it will overwrite the value.
20-
* If a key exists in multiple dictionaries, the last one will take precedence.
14+
* If a key exists in both dictionaries and both values are Mappings,
15+
they will be merged iteratively.
16+
* If both values are lists or tuples, they will be concatenated.
17+
* If both values are sets, they will be unioned.
18+
* Otherwise (different types or primitive values), the value from the
19+
subsequent dictionary will overwrite the previous one.
2120
2221
This means that the order of dictionaries matters. The first dictionary
2322
will be the base, and the subsequent dictionaries will update it.
@@ -29,22 +28,42 @@ def deep_merge(
2928
if not dcts:
3029
return {}
3130

32-
# Start with a shallow copy of the first dictionary
33-
merged = dcts[0].copy()
31+
result = deepcopy(dict(dcts[0]))
3432

35-
for d in dcts[1:]:
36-
stack = [(merged, d)]
33+
for src in dcts[1:]:
34+
stack: list[tuple[dict[str, Any], Mapping[str, Any]]] = [(result, src)]
3735

3836
while stack:
39-
d1, d2 = stack.pop()
40-
for key, value in d2.items():
41-
if (
42-
key in merged
43-
and isinstance(d1[key], dict | Mapping)
44-
and isinstance(value, dict | Mapping)
37+
dest, current = stack.pop()
38+
39+
for key, value in current.items():
40+
if key not in dest:
41+
dest[key] = deepcopy(value)
42+
continue
43+
44+
existing = dest[key]
45+
46+
# Mapping → merge deeper
47+
if isinstance(existing, Mapping) and isinstance(value, Mapping):
48+
# Ensure we are working with a dict for the merge target
49+
# This avoids nested specialized types if they support item assignment
50+
if type(existing) is not dict:
51+
existing = dict(existing)
52+
dest[key] = existing
53+
stack.append((existing, value))
54+
55+
# Lists / tuples → concatenate
56+
elif (isinstance(existing, list) and isinstance(value, list)) or (
57+
isinstance(existing, tuple) and isinstance(value, tuple)
4558
):
46-
stack.append((d1[key], value))
59+
dest[key] = existing + deepcopy(value) # type: ignore[operator]
60+
61+
# Sets → union
62+
elif isinstance(existing, set) and isinstance(value, set):
63+
dest[key] = existing | deepcopy(value)
64+
65+
# Fallback → overwrite
4766
else:
48-
d1[key] = value
67+
dest[key] = deepcopy(value)
4968

50-
return merged
69+
return result

tests/config/app/test_deep_merge.py

Lines changed: 133 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,144 @@
66
@pytest.mark.parametrize(
77
"thedicts,expected",
88
[
9-
# 1
10-
([{"a": 1}, {"a": 2}], {"a": 2}),
11-
# 2
12-
([{"a": 1, "b": 2}, {"a": 2}], {"a": 2, "b": 2}),
13-
# 3
14-
(
9+
#
10+
pytest.param([], {}, id="empty"),
11+
#
12+
pytest.param([{"a": 1}, {"a": 2}], {"a": 2}, id="overwrite_scalar"),
13+
#
14+
pytest.param(
15+
[{"a": 1, "b": 2}, {"a": 2}],
16+
{"a": 2, "b": 2},
17+
id="merge_keys_overwrite_scalar",
18+
),
19+
#
20+
pytest.param(
1521
[{"db": {"host": "localhost", "port": 1234}}, {"db": {"port": 5432}}],
1622
{"db": {"host": "localhost", "port": 5432}},
23+
id="nested_dicts_recursive",
24+
),
25+
#
26+
pytest.param(
27+
[{"a": [1, 2]}, {"a": [3, 4]}],
28+
{"a": [1, 2, 3, 4]},
29+
id="list_concatenation",
30+
),
31+
#
32+
pytest.param(
33+
[{"t": (1, 2)}, {"t": (3, 4)}],
34+
{"t": (1, 2, 3, 4)},
35+
id="tuple_concatenation",
36+
),
37+
#
38+
pytest.param(
39+
[{"a": [1, 2]}, {"a": 3}],
40+
{"a": 3},
41+
id="overwrite_list_with_scalar",
1742
),
1843
#
44+
pytest.param(
45+
[{"a": 1}, {"a": [2, 3]}],
46+
{"a": [2, 3]},
47+
id="overwrite_scalar_with_list",
48+
),
49+
#
50+
pytest.param(
51+
[{"x": {"l": ["a"]}}, {"x": {"l": ["b"]}}],
52+
{"x": {"l": ["a", "b"]}},
53+
id="nested_list_concatenation",
54+
),
55+
#
56+
pytest.param(
57+
[{"a": [{"x": 1}]}, {"a": [{"y": 2}]}],
58+
{"a": [{"x": 1}, {"y": 2}]},
59+
id="list_of_dicts_concatenation",
60+
),
61+
#
62+
pytest.param(
63+
[{"s": {1, 2}}, {"s": {3, 4}}],
64+
{"s": {1, 2, 3, 4}},
65+
id="set_union",
66+
),
67+
#
68+
pytest.param(
69+
[{"s": {1, 2}}, {"s": {2, 3}}],
70+
{"s": {1, 2, 3}},
71+
id="set_union_overlap",
72+
),
1973
],
2074
)
2175
def test_deep_merge(thedicts, expected):
2276
assert deep_merge(*thedicts) == expected
77+
78+
79+
def test_deep_merge_is_truly_deep():
80+
d1 = {"nested": [1, 2], "deep": {"a": 1}}
81+
d2 = {}
82+
merged = deep_merge(d1, d2)
83+
84+
# Modify result
85+
merged["nested"].append(3)
86+
merged["deep"]["b"] = 2
87+
88+
# Assert original is untouched
89+
assert d1["nested"] == [1, 2]
90+
assert "b" not in d1["deep"]
91+
92+
93+
def test_three_way_merge():
94+
d1 = {"a": 1}
95+
d2 = {"b": 2}
96+
d3 = {"c": 3}
97+
assert deep_merge(d1, d2, d3) == {"a": 1, "b": 2, "c": 3}
98+
99+
100+
def test_deep_merge_inputs_remain_independent():
101+
# Verify that subsequent dictionaries (not just the first) are also treated as immutable sources
102+
d1 = {}
103+
d2 = {"a": [1, 2], "b": {"x": 1}}
104+
merged = deep_merge(d1, d2)
105+
106+
# Modify result
107+
merged["a"].append(3)
108+
merged["b"]["y"] = 2
109+
110+
# Assert input d2 is untouched
111+
assert d2["a"] == [1, 2]
112+
assert "y" not in d2["b"]
113+
114+
115+
def test_deep_merge_return_type():
116+
class MyMap(dict):
117+
pass
118+
119+
m1 = MyMap({"a": 1})
120+
m2 = {"b": 2}
121+
result = deep_merge(m1, m2)
122+
123+
assert isinstance(result, dict)
124+
assert not isinstance(result, MyMap)
125+
assert result == {"a": 1, "b": 2}
126+
127+
128+
def test_deep_merge_converts_nested_custom_mapping_to_dict():
129+
class MyMap(dict):
130+
pass
131+
132+
# MyMap instance nested inside a standard dict
133+
d1 = {"nested": MyMap({"a": 1})}
134+
d2 = {"nested": {"b": 2}}
135+
136+
result = deep_merge(d1, d2)
137+
138+
# Check that merge happened correctly
139+
assert result == {"nested": {"a": 1, "b": 2}}
140+
141+
# Check that the nested dictionary is a plain dict, not MyMap
142+
# This verifies that the line `existing = dict(existing)` was executed
143+
# and the result contains the converted dict.
144+
assert type(result["nested"]) is dict
145+
assert not isinstance(result["nested"], MyMap)
146+
147+
# Verify original d1 is untouched
148+
assert isinstance(d1["nested"], MyMap)
149+
assert d1["nested"] == {"a": 1}

0 commit comments

Comments
 (0)