diff --git a/CHANGES/783.feature b/CHANGES/783.feature new file mode 100644 index 000000000..c756df555 --- /dev/null +++ b/CHANGES/783.feature @@ -0,0 +1 @@ +Added ``to_dict()`` method to export multidict contents as a standard dict with value lists. (#783) diff --git a/multidict/_abc.py b/multidict/_abc.py index 54253e9e7..897eaae77 100644 --- a/multidict/_abc.py +++ b/multidict/_abc.py @@ -42,6 +42,10 @@ def getone(self, key: str, default: _T) -> Union[_V_co, _T]: ... def getone(self, key: str, default: _T = ...) -> Union[_V_co, _T]: """Return first value for key.""" + @abc.abstractmethod + def to_dict(self) -> dict[str, list[_V_co]]: + """Return a dict with lists of all values for each key.""" + class MutableMultiMapping(MultiMapping[_V], MutableMapping[str, _V]): @abc.abstractmethod diff --git a/multidict/_multidict.c b/multidict/_multidict.c index 73c002296..a002494c9 100644 --- a/multidict/_multidict.c +++ b/multidict/_multidict.c @@ -245,6 +245,77 @@ _multidict_proxy_copy(MultiDictProxyObject *self, PyTypeObject *type) return multidict_copy(self->md); } +PyDoc_STRVAR(multidict_to_dict_doc, + "Return a dict with lists of all values for each key."); + +static PyObject * +multidict_to_dict(MultiDictObject *self) +{ + PyObject *result = PyDict_New(); + if (result == NULL) { + return NULL; + } + + PyObject *seen = PyDict_New(); + if (seen == NULL) { + Py_DECREF(result); + return NULL; + } + + md_pos_t pos; + md_init_pos(self, &pos); + PyObject *identity = NULL; + PyObject *key = NULL; + PyObject *value = NULL; + + int tmp; + while ((tmp = md_next(self, &pos, &identity, &key, &value)) > 0) { + PyObject *first_key = PyDict_GetItem(seen, identity); + if (first_key == NULL) { + PyObject *lst = PyList_New(1); + if (lst == NULL) { + goto fail; + } + PyList_SET_ITEM(lst, 0, value); + value = NULL; + if (PyDict_SetItem(seen, identity, key) < 0) { + Py_DECREF(lst); + goto fail; + } + if (PyDict_SetItem(result, key, lst) < 0) { + Py_DECREF(lst); + goto fail; + } + Py_DECREF(lst); + } else { + PyObject *lst = PyDict_GetItem(result, first_key); + if (lst == NULL || PyList_Append(lst, value) < 0) { + goto fail; + } + Py_DECREF(value); + value = NULL; + } + Py_DECREF(identity); + Py_DECREF(key); + identity = NULL; + key = NULL; + } + if (tmp < 0) { + goto fail; + } + + Py_DECREF(seen); + return result; + +fail: + Py_XDECREF(identity); + Py_XDECREF(key); + Py_XDECREF(value); + Py_DECREF(seen); + Py_DECREF(result); + return NULL; +} + /******************** Base Methods ********************/ static inline PyObject * @@ -887,6 +958,10 @@ static PyMethodDef multidict_methods[] = { METH_FASTCALL | METH_KEYWORDS, multidict_add_doc}, {"copy", (PyCFunction)multidict_copy, METH_NOARGS, multidict_copy_doc}, + {"to_dict", + (PyCFunction)multidict_to_dict, + METH_NOARGS, + multidict_to_dict_doc}, {"extend", (PyCFunction)multidict_extend, METH_VARARGS | METH_KEYWORDS, @@ -1144,6 +1219,12 @@ multidict_proxy_reduce(MultiDictProxyObject *self) return NULL; } +static PyObject * +multidict_proxy_to_dict(MultiDictProxyObject *self) +{ + return multidict_to_dict(self->md); +} + static Py_ssize_t multidict_proxy_mp_len(MultiDictProxyObject *self) { @@ -1245,6 +1326,10 @@ static PyMethodDef multidict_proxy_methods[] = { (PyCFunction)Py_GenericAlias, METH_O | METH_CLASS, NULL}, + {"to_dict", + (PyCFunction)multidict_proxy_to_dict, + METH_NOARGS, + multidict_to_dict_doc}, {NULL, NULL} /* sentinel */ }; diff --git a/multidict/_multidict_py.py b/multidict/_multidict_py.py index ef6c03c7f..e95d1b602 100644 --- a/multidict/_multidict_py.py +++ b/multidict/_multidict_py.py @@ -772,6 +772,19 @@ def __sizeof__(self) -> int: def __reduce__(self) -> tuple[type[Self], tuple[list[tuple[str, _V]]]]: return (self.__class__, (list(self.items()),)) + def to_dict(self) -> dict[str, list[_V]]: + """Return a dict with lists of all values for each key.""" + result: dict[str, list[_V]] = {} + seen_identities: dict[str, str] = {} + for e in self._keys.iter_entries(): + first_key = seen_identities.get(e.identity) + if first_key is None: + seen_identities[e.identity] = self._key(e.key) + result[self._key(e.key)] = [e.value] + else: + result[first_key].append(e.value) + return result + def add(self, key: str, value: _V) -> None: identity = self._identity(key) hash_ = hash(identity) @@ -1212,6 +1225,10 @@ def __repr__(self) -> str: body = ", ".join(f"'{k}': {v!r}" for k, v in self.items()) return f"<{self.__class__.__name__}({body})>" + def to_dict(self) -> dict[str, list[_V]]: + """Return a dict with lists of all values for each key.""" + return self._md.to_dict() + def copy(self) -> MultiDict[_V]: """Return a copy of itself.""" return MultiDict(self._md) diff --git a/tests/isolated/multidict_to_dict.py b/tests/isolated/multidict_to_dict.py new file mode 100644 index 000000000..f8bc38942 --- /dev/null +++ b/tests/isolated/multidict_to_dict.py @@ -0,0 +1,41 @@ +"""Memory leak test for to_dict().""" + +import gc +import os + +import psutil +from multidict import MultiDict + + +process = psutil.Process(os.getpid()) + + +def trim_ram() -> None: + gc.collect() + + +def get_memory_usage() -> int: + memory_info = process.memory_info() + return memory_info.rss // (1024 * 1024) + + +def test_to_dict_leak() -> None: + for _ in range(100): + d = MultiDict([("a", 1), ("b", 2)]) + d.to_dict() + trim_ram() + + mem_before = get_memory_usage() + for _ in range(1_000_000): + d = MultiDict([("a", 1), ("b", 2)]) + d.to_dict() + trim_ram() + mem_after = get_memory_usage() + + growth = mem_after - mem_before + assert growth < 50, f"Memory grew by {growth} MB, possible leak" + + +if __name__ == "__main__": + test_to_dict_leak() + print("PASSED: No memory leak detected in to_dict()") diff --git a/tests/test_leaks.py b/tests/test_leaks.py index 56126d4bc..29949e76e 100644 --- a/tests/test_leaks.py +++ b/tests/test_leaks.py @@ -16,6 +16,7 @@ "multidict_extend_tuple.py", "multidict_update_multidict.py", "multidict_pop.py", + "multidict_to_dict.py", ), ) @pytest.mark.leaks diff --git a/tests/test_to_dict.py b/tests/test_to_dict.py new file mode 100644 index 000000000..b2a860acd --- /dev/null +++ b/tests/test_to_dict.py @@ -0,0 +1,180 @@ +"""Test to_dict functionality for all multidict types.""" + +from collections.abc import Iterable +from typing import Optional, Protocol, Type + +import pytest + +from multidict import ( + CIMultiDict, + CIMultiDictProxy, + MultiDict, + MultiDictProxy, + MultiMapping, +) + + +class MultidictModule(Protocol): + MultiDict: Type[MultiDict[object]] + CIMultiDict: Type[CIMultiDict[object]] + MultiDictProxy: Type[MultiDictProxy[object]] + CIMultiDictProxy: Type[CIMultiDictProxy[object]] + + +class DictFactory(Protocol): + def __call__( + self, arg: Optional[Iterable[tuple[str, object]]] = None + ) -> MultiMapping[object]: + raise NotImplementedError # pragma: no cover + + +class BaseToDictTests: + """Base tests for to_dict() method, inherited by all multidict type tests.""" + + def test_to_dict_simple(self, cls: DictFactory) -> None: + """Test basic conversion with unique keys.""" + d = cls([("a", 1), ("b", 2)]) + result = d.to_dict() + assert result == {"a": [1], "b": [2]} + + def test_to_dict_multi_values(self, cls: DictFactory) -> None: + """Test grouping multiple values under the same key.""" + d = cls([("a", 1), ("b", 2), ("a", 3)]) + result = d.to_dict() + assert result == {"a": [1, 3], "b": [2]} + + def test_to_dict_empty(self, cls: DictFactory) -> None: + """Test conversion of an empty multidict.""" + d = cls() + result = d.to_dict() + assert result == {} + + def test_to_dict_returns_new_dict(self, cls: DictFactory) -> None: + """Test that each call returns a new dictionary instance.""" + d = cls([("a", 1)]) + result1 = d.to_dict() + result2 = d.to_dict() + assert result1 == result2 + assert result1 is not result2 + + def test_to_dict_list_is_fresh(self, cls: DictFactory) -> None: + """Test that value lists are independent between calls.""" + d = cls([("a", 1)]) + result1 = d.to_dict() + result2 = d.to_dict() + assert result1["a"] is not result2["a"] + + def test_to_dict_order_preservation(self, cls: DictFactory) -> None: + """Test that value lists maintain insertion order.""" + d = cls([("x", 3), ("x", 1), ("x", 2)]) + result = d.to_dict() + assert result["x"] == [3, 1, 2] + + def test_to_dict_large_data(self, cls: DictFactory) -> None: + """Test to_dict with a large number of entries for performance.""" + items = [(f"key{i % 100}", i) for i in range(10000)] + d = cls(items) + result = d.to_dict() + assert len(result) == 100 + assert all(len(v) == 100 for v in result.values()) + + def test_to_dict_mixed_value_types(self, cls: DictFactory) -> None: + """Test to_dict with mixed value types (str, int) to verify generic _V.""" + d = cls([("a", 1), ("a", "two"), ("b", 3.14)]) + result = d.to_dict() + assert result["a"] == [1, "two"] + assert result["b"] == [3.14] + + +class TestMultiDictToDict(BaseToDictTests): + """Tests for MultiDict.to_dict().""" + + @pytest.fixture + def cls(self, multidict_module: MultidictModule) -> Type[MultiDict[object]]: + return multidict_module.MultiDict + + +class TestCIMultiDictToDict(BaseToDictTests): + """Tests for CIMultiDict.to_dict().""" + + @pytest.fixture + def cls(self, multidict_module: MultidictModule) -> Type[CIMultiDict[object]]: + return multidict_module.CIMultiDict + + def test_to_dict_case_insensitive_grouping(self, cls: DictFactory) -> None: + """Test that case variants are grouped under the same key.""" + d = cls([("A", 1), ("a", 2), ("B", 3)]) + result = d.to_dict() + assert len(result) == 2 + assert "A" in result or "a" in result + assert "B" in result or "b" in result + key_a = "A" if "A" in result else "a" + key_b = "B" if "B" in result else "b" + assert result[key_a] == [1, 2] + assert result[key_b] == [3] + + +class TestMultiDictProxyToDict(BaseToDictTests): + """Tests for MultiDictProxy.to_dict().""" + + @pytest.fixture + def cls(self, multidict_module: MultidictModule) -> DictFactory: + def make_proxy( + arg: Optional[Iterable[tuple[str, object]]] = None, + ) -> MultiMapping[object]: + md: MultiDict[object] = ( + multidict_module.MultiDict(arg) if arg else multidict_module.MultiDict() + ) + return multidict_module.MultiDictProxy(md) + + return make_proxy + + def test_to_dict_proxy_mutation_isolation( + self, cls: DictFactory, multidict_module: MultidictModule + ) -> None: + """Test that modifying returned dict does not affect the proxy.""" + md: MultiDict[object] = multidict_module.MultiDict([("a", 1)]) + proxy: MultiMapping[object] = multidict_module.MultiDictProxy(md) + result = proxy.to_dict() + result["a"].append(999) + assert proxy.getall("a") == [1] + + +class TestCIMultiDictProxyToDict(BaseToDictTests): + """Tests for CIMultiDictProxy.to_dict().""" + + @pytest.fixture + def cls(self, multidict_module: MultidictModule) -> DictFactory: + def make_proxy( + arg: Optional[Iterable[tuple[str, object]]] = None, + ) -> MultiMapping[object]: + md: CIMultiDict[object] = ( + multidict_module.CIMultiDict(arg) + if arg + else multidict_module.CIMultiDict() + ) + return multidict_module.CIMultiDictProxy(md) + + return make_proxy + + def test_to_dict_case_insensitive_grouping(self, cls: DictFactory) -> None: + """Test that case variants are grouped under the same key.""" + d = cls([("A", 1), ("a", 2), ("B", 3)]) + result = d.to_dict() + assert len(result) == 2 + assert "A" in result or "a" in result + assert "B" in result or "b" in result + key_a = "A" if "A" in result else "a" + key_b = "B" if "B" in result else "b" + assert result[key_a] == [1, 2] + assert result[key_b] == [3] + + def test_to_dict_proxy_mutation_isolation( + self, cls: DictFactory, multidict_module: MultidictModule + ) -> None: + """Test that modifying returned dict does not affect the proxy.""" + md: CIMultiDict[object] = multidict_module.CIMultiDict([("a", 1)]) + proxy: MultiMapping[object] = multidict_module.CIMultiDictProxy(md) + result = proxy.to_dict() + result["a"].append(999) + assert proxy.getall("a") == [1]