diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 3f2b35b2..1e6bf7e4 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -309,22 +309,15 @@ class DuckDBPyConnection: strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... def from_df(self, df: pandas.DataFrame) -> DuckDBPyRelation: ... - @typing.overload - def from_parquet( - self, - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: ParquetCompression | None = None, - ) -> DuckDBPyRelation: ... - @typing.overload def from_parquet( self, - file_globs: Sequence[str], + path_or_buffer: str + | bytes + | os.PathLike[str] + | os.PathLike[bytes] + | typing.IO[bytes] + | typing.IO[str] + | Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]], binary_as_string: bool = False, *, file_row_number: bool = False, @@ -433,22 +426,15 @@ class DuckDBPyConnection: hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, ) -> DuckDBPyRelation: ... - @typing.overload - def read_parquet( - self, - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: ParquetCompression | None = None, - ) -> DuckDBPyRelation: ... - @typing.overload def read_parquet( self, - file_globs: Sequence[str], + path_or_buffer: str + | bytes + | os.PathLike[str] + | os.PathLike[bytes] + | typing.IO[bytes] + | typing.IO[str] + | Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]], binary_as_string: bool = False, *, file_row_number: bool = False, @@ -1061,21 +1047,14 @@ def from_csv_auto( strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... def from_df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... -@typing.overload -def from_parquet( - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: ParquetCompression | None = None, - connection: DuckDBPyConnection | None = None, -) -> DuckDBPyRelation: ... -@typing.overload def from_parquet( - file_globs: Sequence[str], + path_or_buffer: str + | bytes + | os.PathLike[str] + | os.PathLike[bytes] + | typing.IO[bytes] + | typing.IO[str] + | Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]], binary_as_string: bool = False, *, file_row_number: bool = False, @@ -1232,21 +1211,14 @@ def read_json( hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, ) -> DuckDBPyRelation: ... -@typing.overload -def read_parquet( - file_glob: str, - binary_as_string: bool = False, - *, - file_row_number: bool = False, - filename: bool = False, - hive_partitioning: bool = False, - union_by_name: bool = False, - compression: ParquetCompression | None = None, - connection: DuckDBPyConnection | None = None, -) -> DuckDBPyRelation: ... -@typing.overload def read_parquet( - file_globs: Sequence[str], + path_or_buffer: str + | bytes + | os.PathLike[str] + | os.PathLike[bytes] + | typing.IO[bytes] + | typing.IO[str] + | Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]], binary_as_string: bool = False, *, file_row_number: bool = False, diff --git a/scripts/connection_methods.json b/scripts/connection_methods.json index 56398af0..7ba70523 100644 --- a/scripts/connection_methods.json +++ b/scripts/connection_methods.json @@ -941,11 +941,11 @@ "read_parquet" ], "function": "FromParquet", - "docs": "Create a relation object from the Parquet files in file_glob", + "docs": "Create a relation object from the Parquet path(s) or file-like object(s) in 'path_or_buffer'", "args": [ { - "name": "file_glob", - "type": "str" + "name": "path_or_buffer", + "type": "Union[str, bytes, os.PathLike, IO[bytes], IO[str], Sequence[Union[str, bytes, os.PathLike, IO[bytes], IO[str]]]]" }, { "name": "binary_as_string", @@ -982,53 +982,6 @@ ], "return": "DuckDBPyRelation" }, - { - "name": [ - "from_parquet", - "read_parquet" - ], - "function": "FromParquets", - "docs": "Create a relation object from the Parquet files in file_globs", - "args": [ - { - "name": "file_globs", - "type": "List[str]" - }, - { - "name": "binary_as_string", - "default": "False", - "type": "bool" - } - ], - "kwargs": [ - { - "name": "file_row_number", - "default": "False", - "type": "bool" - }, - { - "name": "filename", - "default": "False", - "type": "bool" - }, - { - "name": "hive_partitioning", - "default": "False", - "type": "bool" - }, - { - "name": "union_by_name", - "default": "False", - "type": "bool" - }, - { - "name": "compression", - "default": "None", - "type": "str" - } - ], - "return": "DuckDBPyRelation" - }, { "name": "get_table_names", "function": "GetTableNames", diff --git a/src/duckdb_py/duckdb_python.cpp b/src/duckdb_py/duckdb_python.cpp index d950960d..ea2ac66d 100644 --- a/src/duckdb_py/duckdb_python.cpp +++ b/src/duckdb_py/duckdb_python.cpp @@ -748,64 +748,34 @@ static void InitializeConnectionMethods(py::module_ &m) { py::arg("connection") = py::none()); m.def( "from_parquet", - [](const string &file_glob, bool binary_as_string, bool file_row_number, bool filename, bool hive_partitioning, - bool union_by_name, const py::object &compression = py::none(), - shared_ptr conn = nullptr) { - if (!conn) { - conn = DuckDBPyConnection::DefaultConnection(); - } - return conn->FromParquet(file_glob, binary_as_string, file_row_number, filename, hive_partitioning, - union_by_name, compression); - }, - "Create a relation object from the Parquet files in file_glob", py::arg("file_glob"), - py::arg("binary_as_string") = false, py::kw_only(), py::arg("file_row_number") = false, - py::arg("filename") = false, py::arg("hive_partitioning") = false, py::arg("union_by_name") = false, - py::arg("compression") = py::none(), py::arg("connection") = py::none()); - m.def( - "read_parquet", - [](const string &file_glob, bool binary_as_string, bool file_row_number, bool filename, bool hive_partitioning, - bool union_by_name, const py::object &compression = py::none(), - shared_ptr conn = nullptr) { - if (!conn) { - conn = DuckDBPyConnection::DefaultConnection(); - } - return conn->FromParquet(file_glob, binary_as_string, file_row_number, filename, hive_partitioning, - union_by_name, compression); - }, - "Create a relation object from the Parquet files in file_glob", py::arg("file_glob"), - py::arg("binary_as_string") = false, py::kw_only(), py::arg("file_row_number") = false, - py::arg("filename") = false, py::arg("hive_partitioning") = false, py::arg("union_by_name") = false, - py::arg("compression") = py::none(), py::arg("connection") = py::none()); - m.def( - "from_parquet", - [](const vector &file_globs, bool binary_as_string, bool file_row_number, bool filename, + [](const py::object &path_or_buffer, bool binary_as_string, bool file_row_number, bool filename, bool hive_partitioning, bool union_by_name, const py::object &compression = py::none(), shared_ptr conn = nullptr) { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } - return conn->FromParquets(file_globs, binary_as_string, file_row_number, filename, hive_partitioning, - union_by_name, compression); + return conn->FromParquet(path_or_buffer, binary_as_string, file_row_number, filename, hive_partitioning, + union_by_name, compression); }, - "Create a relation object from the Parquet files in file_globs", py::arg("file_globs"), - py::arg("binary_as_string") = false, py::kw_only(), py::arg("file_row_number") = false, - py::arg("filename") = false, py::arg("hive_partitioning") = false, py::arg("union_by_name") = false, - py::arg("compression") = py::none(), py::arg("connection") = py::none()); + "Create a relation object from the Parquet path(s) or file-like object(s) in 'path_or_buffer'", + py::arg("path_or_buffer"), py::arg("binary_as_string") = false, py::kw_only(), + py::arg("file_row_number") = false, py::arg("filename") = false, py::arg("hive_partitioning") = false, + py::arg("union_by_name") = false, py::arg("compression") = py::none(), py::arg("connection") = py::none()); m.def( "read_parquet", - [](const vector &file_globs, bool binary_as_string, bool file_row_number, bool filename, + [](const py::object &path_or_buffer, bool binary_as_string, bool file_row_number, bool filename, bool hive_partitioning, bool union_by_name, const py::object &compression = py::none(), shared_ptr conn = nullptr) { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } - return conn->FromParquets(file_globs, binary_as_string, file_row_number, filename, hive_partitioning, - union_by_name, compression); + return conn->FromParquet(path_or_buffer, binary_as_string, file_row_number, filename, hive_partitioning, + union_by_name, compression); }, - "Create a relation object from the Parquet files in file_globs", py::arg("file_globs"), - py::arg("binary_as_string") = false, py::kw_only(), py::arg("file_row_number") = false, - py::arg("filename") = false, py::arg("hive_partitioning") = false, py::arg("union_by_name") = false, - py::arg("compression") = py::none(), py::arg("connection") = py::none()); + "Create a relation object from the Parquet path(s) or file-like object(s) in 'path_or_buffer'", + py::arg("path_or_buffer"), py::arg("binary_as_string") = false, py::kw_only(), + py::arg("file_row_number") = false, py::arg("filename") = false, py::arg("hive_partitioning") = false, + py::arg("union_by_name") = false, py::arg("compression") = py::none(), py::arg("connection") = py::none()); m.def( "get_table_names", [](const string &query, bool qualified, shared_ptr conn = nullptr) { diff --git a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp index 0369c9a9..74cdf6ce 100644 --- a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp +++ b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp @@ -293,16 +293,9 @@ struct DuckDBPyConnection : public enable_shared_from_this { unique_ptr FromDF(const PandasDataFrame &value); - unique_ptr FromParquet(const string &file_glob, bool binary_as_string, bool file_row_number, - bool filename, bool hive_partitioning, bool union_by_name, - const py::object &compression = py::none()); - unique_ptr FromParquets(const vector &file_globs, bool binary_as_string, - bool file_row_number, bool filename, bool hive_partitioning, - bool union_by_name, const py::object &compression = py::none()); - - unique_ptr FromParquetInternal(Value &&file_param, bool binary_as_string, bool file_row_number, - bool filename, bool hive_partitioning, bool union_by_name, - const py::object &compression = py::none()); + unique_ptr FromParquet(const py::object &path_or_buffer, bool binary_as_string, + bool file_row_number, bool filename, bool hive_partitioning, + bool union_by_name, const py::object &compression = py::none()); unique_ptr FromArrow(py::object &arrow_object); diff --git a/src/duckdb_py/path_like.cpp b/src/duckdb_py/path_like.cpp index a69bcd42..7ab5eace 100644 --- a/src/duckdb_py/path_like.cpp +++ b/src/duckdb_py/path_like.cpp @@ -10,8 +10,7 @@ namespace duckdb { struct PathLikeProcessor { public: - PathLikeProcessor(DuckDBPyConnection &connection, PythonImportCache &import_cache) - : connection(connection), import_cache(import_cache) { + explicit PathLikeProcessor(DuckDBPyConnection &connection) : connection(connection) { } public: @@ -29,7 +28,6 @@ struct PathLikeProcessor { public: DuckDBPyConnection &connection; optional_ptr object_store; - PythonImportCache &import_cache; // The list containing every file vector all_files; // The list of files that are registered in the object_store; @@ -41,8 +39,10 @@ void PathLikeProcessor::AddFile(const py::object &object) { all_files.push_back(std::string(py::str(object))); return; } - if (py::isinstance(object, import_cache.pathlib.Path())) { - all_files.push_back(std::string(py::str(object))); + if (py::isinstance(object) || py::hasattr(object, "__fspath__")) { + // A bytes path or an os.PathLike object (e.g. pathlib.Path) - decode it to a string + auto fsdecode = py::module_::import("os").attr("fsdecode"); + all_files.push_back(std::string(py::str(fsdecode(object)))); return; } // This is (assumed to be) a file-like object @@ -79,9 +79,7 @@ PathLike PathLikeProcessor::Finalize() { } PathLike PathLike::Create(const py::object &object, DuckDBPyConnection &connection) { - auto &import_cache = *DuckDBPyConnection::ImportCache(); - - PathLikeProcessor processor(connection, import_cache); + PathLikeProcessor processor(connection); if (py::isinstance(object)) { auto list = py::list(object); for (auto &item : list) { diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index 6fcbe0ac..bfab3f37 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -299,25 +299,15 @@ static void InitializeConnectionMethods(py::class_ DuckDBPyConnection::FromDF(const PandasDataFrame &v return CreateRelation(std::move(rel)); } -unique_ptr DuckDBPyConnection::FromParquetInternal(Value &&file_param, bool binary_as_string, - bool file_row_number, bool filename, - bool hive_partitioning, bool union_by_name, - const py::object &compression) { +unique_ptr DuckDBPyConnection::FromParquet(const py::object &path_or_buffer, bool binary_as_string, + bool file_row_number, bool filename, + bool hive_partitioning, bool union_by_name, + const py::object &compression) { auto &connection = con.GetConnection(); + auto path_like = GetPathLike(path_or_buffer); + auto file_like_object_wrapper = std::move(path_like.dependency); + string name = "parquet_" + StringUtil::GenerateRandomName(); + vector file_values; + for (auto &file : path_like.files) { + file_values.emplace_back(std::move(file)); + } vector params; - params.emplace_back(std::move(file_param)); + params.emplace_back(Value::LIST(LogicalType::VARCHAR, std::move(file_values))); named_parameter_map_t named_parameters({{"binary_as_string", Value::BOOLEAN(binary_as_string)}, {"file_row_number", Value::BOOLEAN(file_row_number)}, {"filename", Value::BOOLEAN(filename)}, @@ -1806,30 +1803,11 @@ unique_ptr DuckDBPyConnection::FromParquetInternal(Value &&fil } D_ASSERT(py::gil_check()); py::gil_scoped_release gil; - return CreateRelation(connection.TableFunction("parquet_scan", params, named_parameters)->Alias(name)); -} - -unique_ptr DuckDBPyConnection::FromParquet(const string &file_glob, bool binary_as_string, - bool file_row_number, bool filename, - bool hive_partitioning, bool union_by_name, - const py::object &compression) { - auto file_param = Value(file_glob); - return FromParquetInternal(std::move(file_param), binary_as_string, file_row_number, filename, hive_partitioning, - union_by_name, compression); -} - -unique_ptr DuckDBPyConnection::FromParquets(const vector &file_globs, bool binary_as_string, - bool file_row_number, bool filename, - bool hive_partitioning, bool union_by_name, - const py::object &compression) { - vector params; - auto file_globs_as_value = vector(); - for (const auto &file : file_globs) { - file_globs_as_value.emplace_back(file); + auto parquet_relation = connection.TableFunction("parquet_scan", params, named_parameters); + if (file_like_object_wrapper) { + parquet_relation->AddExternalDependency(std::move(file_like_object_wrapper)); } - auto file_param = Value::LIST(file_globs_as_value); - return FromParquetInternal(std::move(file_param), binary_as_string, file_row_number, filename, hive_partitioning, - union_by_name, compression); + return CreateRelation(parquet_relation->Alias(name)); } unique_ptr DuckDBPyConnection::FromArrow(py::object &arrow_object) { diff --git a/tests/fast/api/test_fsspec.py b/tests/fast/api/test_fsspec.py index f68415cb..65e2d85f 100644 --- a/tests/fast/api/test_fsspec.py +++ b/tests/fast/api/test_fsspec.py @@ -51,7 +51,7 @@ def __init__(self) -> None: fs = fsspec.filesystem("deadlock") duckdb_cursor.register_filesystem(fs) - result = duckdb_cursor.read_parquet(file_globs=["deadlock://a", "deadlock://b"], union_by_name=True) + result = duckdb_cursor.read_parquet(["deadlock://a", "deadlock://b"], union_by_name=True) assert len(result.fetchall()) == 100_000 def test_fsspec_seek_read_atomicity(self, duckdb_cursor, tmp_path): diff --git a/tests/fast/api/test_read_parquet.py b/tests/fast/api/test_read_parquet.py new file mode 100644 index 00000000..ebe47095 --- /dev/null +++ b/tests/fast/api/test_read_parquet.py @@ -0,0 +1,193 @@ +from io import BytesIO +from pathlib import Path +from typing import NoReturn + +import pytest + +import duckdb + + +@pytest.fixture +def parquet_file(tmp_path): + path = tmp_path / "integers.parquet" + duckdb.sql("SELECT i FROM range(10) t(i)").write_parquet(str(path)) + return path + + +@pytest.fixture +def parquet_files(tmp_path): + directory = tmp_path / "data" + directory.mkdir() + file1 = directory / "file1.parquet" + file2 = directory / "file2.parquet" + duckdb.sql("SELECT 1 AS i").write_parquet(str(file1)) + duckdb.sql("SELECT 2 AS i").write_parquet(str(file2)) + return file1, file2 + + +@pytest.fixture +def parquet_bytes(parquet_file): + return Path(parquet_file).read_bytes() + + +class TestReadParquet: + # Regression / backwards-compat + + def test_read_string(self, duckdb_cursor, parquet_file): + res = duckdb_cursor.read_parquet(str(parquet_file)).fetchall() + assert res == [(i,) for i in range(10)] + + def test_read_list_of_strings(self, duckdb_cursor, parquet_files): + file1, file2 = parquet_files + res = duckdb_cursor.read_parquet([str(file1), str(file2)]).order("i").fetchall() + assert res == [(1,), (2,)] + + def test_read_glob_string(self, duckdb_cursor, parquet_files): + glob = str(parquet_files[0].parent / "*.parquet") + res = duckdb_cursor.read_parquet(glob).order("i").fetchall() + assert res == [(1,), (2,)] + + def test_path_not_mangled(self, duckdb_cursor): + # The path string should be forwarded verbatim to the engine + with pytest.raises(duckdb.IOException, match="no_such_directory"): + duckdb_cursor.read_parquet("no_such_directory/*.parquet").fetchall() + + def test_options_still_thread_through(self, duckdb_cursor, parquet_file): + rel = duckdb_cursor.read_parquet( + parquet_file, + binary_as_string=True, + file_row_number=True, + filename=True, + hive_partitioning=False, + union_by_name=False, + ) + assert set(rel.columns) == {"i", "file_row_number", "filename"} + + def test_compression_option(self, duckdb_cursor, parquet_file): + res = duckdb_cursor.read_parquet(str(parquet_file), compression="snappy").fetchall() + assert res == [(i,) for i in range(10)] + with pytest.raises(duckdb.InvalidInputException, match="only accepts 'compression' as a string"): + duckdb_cursor.read_parquet(str(parquet_file), compression=42) + + # New capability: pathlib.Path + + def test_read_pathlib_path(self, duckdb_cursor, parquet_file): + res = duckdb_cursor.read_parquet(parquet_file).fetchall() + assert res == [(i,) for i in range(10)] + + def test_read_pathlib_path_glob(self, duckdb_cursor, parquet_files): + # Globs survive Path stringification and resolve downstream + glob = parquet_files[0].parent / "*.parquet" + res = duckdb_cursor.read_parquet(glob).order("i").fetchall() + assert res == [(1,), (2,)] + + def test_read_pathlike(self, duckdb_cursor, parquet_file): + class MyPath: + def __init__(self, path) -> None: + self._path = path + + def __fspath__(self) -> str: + return str(self._path) + + res = duckdb_cursor.read_parquet(MyPath(parquet_file)).fetchall() + assert res == [(i,) for i in range(10)] + + def test_read_bytes_path(self, duckdb_cursor, parquet_file): + res = duckdb_cursor.read_parquet(str(parquet_file).encode()).fetchall() + assert res == [(i,) for i in range(10)] + + def test_read_mixed_list(self, duckdb_cursor, parquet_files): + file1, file2 = parquet_files + res = duckdb_cursor.read_parquet([Path(file1), str(file2)]).order("i").fetchall() + assert res == [(1,), (2,)] + + def test_read_list_of_paths(self, duckdb_cursor, parquet_files): + file1, file2 = parquet_files + res = duckdb_cursor.read_parquet([Path(file1), Path(file2)]).order("i").fetchall() + assert res == [(1,), (2,)] + + # New capability: file-like objects + + def test_read_filelike(self, duckdb_cursor, parquet_bytes): + pytest.importorskip("fsspec") + res = duckdb_cursor.read_parquet(BytesIO(parquet_bytes)).fetchall() + assert res == [(i,) for i in range(10)] + + def test_read_filelike_list(self, duckdb_cursor, parquet_bytes): + pytest.importorskip("fsspec") + res = duckdb_cursor.read_parquet([BytesIO(parquet_bytes), BytesIO(parquet_bytes)]).fetchall() + assert res == [(i,) for i in range(10)] * 2 + + def test_read_filelike_filename_column(self, duckdb_cursor, parquet_bytes): + # The filename column exposes the generated internal name for file-like objects + pytest.importorskip("fsspec") + rel = duckdb_cursor.read_parquet(BytesIO(parquet_bytes), filename=True) + res = rel.fetchall() + assert all(row[1].startswith("DUCKDB_INTERNAL_OBJECTSTORE://") for row in res) + + def test_read_filelike_rel_out_of_scope(self, duckdb_cursor, parquet_bytes): + pytest.importorskip("fsspec") + + def keep_in_scope(): + # The relation keeps the registered file-like object alive + return duckdb_cursor.read_parquet(BytesIO(parquet_bytes)) + + def close_scope(): + return duckdb_cursor.read_parquet(BytesIO(parquet_bytes)).fetchall() + + relation = keep_in_scope() + res = relation.fetchall() + + res2 = close_scope() + assert res == res2 + + # All four entry points + + def test_module_read_parquet(self, parquet_file): + assert duckdb.read_parquet(parquet_file).fetchall() == [(i,) for i in range(10)] + + def test_module_from_parquet(self, parquet_file): + assert duckdb.from_parquet(parquet_file).fetchall() == [(i,) for i in range(10)] + + def test_connection_from_parquet(self, duckdb_cursor, parquet_file): + assert duckdb_cursor.from_parquet(parquet_file).fetchall() == [(i,) for i in range(10)] + + def test_module_level_with_connection_kwarg(self, duckdb_cursor, parquet_file): + res = duckdb.read_parquet(parquet_file, connection=duckdb_cursor).fetchall() + assert res == [(i,) for i in range(10)] + + # Error handling + + def test_nonexistent_file(self, duckdb_cursor, tmp_path): + missing = tmp_path / "missing.parquet" + with pytest.raises(duckdb.IOException, match=r"missing\.parquet"): + duckdb_cursor.read_parquet(str(missing)).fetchall() + + def test_empty_list(self, duckdb_cursor): + with pytest.raises(duckdb.InvalidInputException, match="non-empty list of paths or file-like objects"): + duckdb_cursor.read_parquet([]) + + def test_filelike_exception(self, duckdb_cursor): + pytest.importorskip("fsspec") + + class ReadError: + def read(self, amount=-1) -> NoReturn: + raise ValueError(amount) + + def seek(self, loc) -> int: + return 0 + + class SeekError: + def read(self, amount=-1) -> bytes: + return b"test" + + def seek(self, loc) -> NoReturn: + raise ValueError(loc) + + # The MemoryFileSystem copies the content with 'read', so this fails instantly + with pytest.raises(ValueError, match="-1"): + duckdb_cursor.read_parquet(ReadError()) + + # 'seek' is never called on the object itself; the copied content is just not valid parquet + with pytest.raises(duckdb.InvalidInputException, match="too small to be a Parquet file"): + duckdb_cursor.read_parquet(SeekError())