Skip to content

Commit 522ed1a

Browse files
committed
Refactor deep_merge
The previous implementation didn't deep-copy values. The current implementation fixes this and adds additional tests to cover also tuples and sets.
1 parent 4ad8a0c commit 522ed1a

2 files changed

Lines changed: 122 additions & 26 deletions

File tree

src/docbuild/config/merge.py

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
"""Merge multiple dictionaries into a new one without modifying inputs."""
22

33
from collections.abc import Mapping
4+
from copy import deepcopy
45
from typing import Any
56

67

78
def deep_merge(
8-
# dct1: ,
9-
*dcts: dict[str, Any],
9+
*dcts: Mapping[str, Any],
1010
) -> dict[str, Any]:
1111
"""Merge multiple dictionaries into a new one without modifying inputs.
1212
1313
Make a deep copy of the first dictionary and then update the copy with the
1414
subsequent dictionaries:
1515
16-
* If a key exists in both dictionaries, the value from the last dictionary
17-
will overwrite the previous one.
18-
* If the value is a list, it will concatenate the lists.
19-
* If the value is a primitive type, it will overwrite the value.
20-
* If a key exists in multiple dictionaries, the last one will take precedence.
16+
* If a key exists in both dictionaries and both values are Mappings,
17+
they will be merged recursively.
18+
* If both values are lists or tuples, they will be concatenated.
19+
* If both values are sets, they will be unioned.
20+
* Otherwise (different types or primitive values), the value from the
21+
subsequent dictionary will overwrite the previous one.
2122
2223
This means that the order of dictionaries matters. The first dictionary
2324
will be the base, and the subsequent dictionaries will update it.
@@ -29,22 +30,27 @@ def deep_merge(
2930
if not dcts:
3031
return {}
3132

32-
# Start with a shallow copy of the first dictionary
33-
merged = dcts[0].copy()
33+
merged: dict[str, Any] = deepcopy(dict(dcts[0]))
3434

3535
for d in dcts[1:]:
36-
stack = [(merged, d)]
37-
38-
while stack:
39-
d1, d2 = stack.pop()
40-
for key, value in d2.items():
41-
if (
42-
key in merged
43-
and isinstance(d1[key], dict | Mapping)
44-
and isinstance(value, dict | Mapping)
45-
):
46-
stack.append((d1[key], value))
47-
else:
48-
d1[key] = value
36+
for key, value in d.items():
37+
if key in merged:
38+
if isinstance(merged[key], Mapping) and isinstance(value, Mapping):
39+
merged[key] = deep_merge(merged[key], value)
40+
continue
41+
42+
is_list = isinstance(merged[key], list) and isinstance(value, list)
43+
is_tuple = isinstance(merged[key], tuple) and isinstance(value, tuple)
44+
is_set = isinstance(merged[key], set) and isinstance(value, set)
45+
46+
if is_list or is_tuple:
47+
merged[key] = merged[key] + deepcopy(value)
48+
continue
49+
50+
if is_set:
51+
merged[key] = merged[key] | deepcopy(value)
52+
continue
53+
54+
merged[key] = deepcopy(value)
4955

5056
return merged

tests/config/app/test_deep_merge.py

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,107 @@
66
@pytest.mark.parametrize(
77
"thedicts,expected",
88
[
9-
# 1
9+
# 1: two dicts with a common key (last one wins)
1010
([{"a": 1}, {"a": 2}], {"a": 2}),
11-
# 2
11+
# 2: two dicts with a common key and another key
12+
# (last one wins for common key, other key is preserved)
1213
([{"a": 1, "b": 2}, {"a": 2}], {"a": 2, "b": 2}),
13-
# 3
14+
# 3: nested dictionaries
1415
(
1516
[{"db": {"host": "localhost", "port": 1234}}, {"db": {"port": 5432}}],
1617
{"db": {"host": "localhost", "port": 5432}},
1718
),
18-
#
19+
# 4: Lists
20+
(
21+
[{"a": [1, 2]}, {"a": [3, 4]}],
22+
{"a": [1, 2, 3, 4]},
23+
),
24+
# 5: Mixed types (overwrite list with int)
25+
(
26+
[{"a": [1, 2]}, {"a": 3}],
27+
{"a": 3},
28+
),
29+
# 6: Mixed types (overwrite int with list)
30+
(
31+
[{"a": 1}, {"a": [2, 3]}],
32+
{"a": [2, 3]},
33+
),
34+
# 7: Nested lists
35+
(
36+
[{"x": {"l": ["a"]}}, {"x": {"l": ["b"]}}],
37+
{"x": {"l": ["a", "b"]}},
38+
),
39+
# 8: Lists containing dictionaries
40+
(
41+
[{"a": [{"x": 1}]}, {"a": [{"y": 2}]}],
42+
{"a": [{"x": 1}, {"y": 2}]},
43+
),
44+
# 9: Tuples
45+
(
46+
[{"t": (1, 2)}, {"t": (3, 4)}],
47+
{"t": (1, 2, 3, 4)},
48+
),
49+
# 10: Sets
50+
(
51+
[{"s": {1, 2}}, {"s": {3, 4}}],
52+
{"s": {1, 2, 3, 4}},
53+
),
54+
# 11: Sets with overlap
55+
(
56+
[{"s": {1, 2}}, {"s": {2, 3}}],
57+
{"s": {1, 2, 3}},
58+
),
1959
],
2060
)
2161
def test_deep_merge(thedicts, expected):
2262
assert deep_merge(*thedicts) == expected
63+
64+
65+
def test_deep_merge_is_truly_deep():
66+
d1 = {"nested": [1, 2], "deep": {"a": 1}}
67+
d2 = {}
68+
merged = deep_merge(d1, d2)
69+
70+
# Modify result
71+
merged["nested"].append(3)
72+
merged["deep"]["b"] = 2
73+
74+
# Assert original is untouched
75+
assert d1["nested"] == [1, 2]
76+
assert "b" not in d1["deep"]
77+
78+
79+
def test_three_way_merge():
80+
d1 = {"a": 1}
81+
d2 = {"b": 2}
82+
d3 = {"c": 3}
83+
assert deep_merge(d1, d2, d3) == {"a": 1, "b": 2, "c": 3}
84+
85+
86+
def test_deep_merge_inputs_remain_independent():
87+
# Verify that subsequent dictionaries (not just the first) are also treated as immutable sources
88+
d1 = {}
89+
d2 = {"a": [1, 2], "b": {"x": 1}}
90+
merged = deep_merge(d1, d2)
91+
92+
# Modify result
93+
merged["a"].append(3)
94+
merged["b"]["y"] = 2
95+
96+
# Assert input d2 is untouched
97+
assert d2["a"] == [1, 2]
98+
assert "y" not in d2["b"]
99+
100+
101+
def test_deep_merge_return_type():
102+
class MyMap(dict):
103+
pass
104+
105+
m1 = MyMap({"a": 1})
106+
m2 = {"b": 2}
107+
result = deep_merge(m1, m2)
108+
109+
assert isinstance(result, dict)
110+
assert not isinstance(result, MyMap)
111+
assert result == {"a": 1, "b": 2}
112+

0 commit comments

Comments
 (0)