forked from wherobots/wherobots-python-dbapi
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
126 lines (99 loc) · 4.08 KB
/
models.py
File metadata and controls
126 lines (99 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from dataclasses import dataclass
from typing import Any, Dict
import pandas
from .constants import DEFAULT_STORAGE_FORMAT
from .types import StorageFormat
@dataclass(frozen=True)
class StoreResult:
"""Result information when a query's results are stored to cloud storage.
Attributes:
result_uri: The URI or presigned URL of the stored result.
size: The size of the stored result in bytes, or None if not available.
"""
result_uri: str
size: int | None = None
@dataclass
class Store:
"""Configuration for storing query results to cloud storage.
When passed to cursor.execute(), query results will be written to cloud
storage instead of being returned directly over the WebSocket connection.
Attributes:
format: The storage format (parquet, csv, or geojson). Defaults to parquet.
single: If True, store as a single file. If False, store as multiple files.
generate_presigned_url: If True, generate a presigned URL for the result.
Requires single=True.
options: Optional dict of format-specific Spark DataFrameWriter options
(e.g. ``{"header": "false", "delimiter": "|"}`` for CSV). These are
applied after the server's default options, so they can override them.
An empty dict is normalized to None.
"""
format: StorageFormat
single: bool = False
generate_presigned_url: bool = False
options: dict[str, str] | None = None
def __post_init__(self) -> None:
if self.generate_presigned_url and not self.single:
raise ValueError("Presigned URL can only be generated when single=True")
# Normalize empty options to None and defensively copy.
if self.options:
self.options = dict(self.options)
else:
self.options = None
@classmethod
def for_download(
cls,
format: StorageFormat | None = None,
options: dict[str, str] | None = None,
) -> "Store":
"""Create a configuration for downloading results via a presigned URL.
This is a convenience method that creates a configuration with
single file mode and presigned URL generation enabled.
Args:
format: The storage format.
options: Optional format-specific Spark DataFrameWriter options.
Returns:
A Store configured for single-file download with presigned URL.
"""
return cls(
format=format or DEFAULT_STORAGE_FORMAT,
single=True,
generate_presigned_url=True,
options=options,
)
def to_dict(self) -> Dict[str, Any]:
"""Serialize this Store to a dict for the WebSocket request.
Returns a dict suitable for inclusion as the ``"store"`` field in an
``execute_sql`` request. The ``options`` key is omitted when there
are no user-supplied options (backward compatible).
"""
d: Dict[str, Any] = {
"format": self.format.value,
"single": str(self.single).lower(),
"generate_presigned_url": str(self.generate_presigned_url).lower(),
}
if self.options:
d["options"] = self.options
return d
@dataclass
class ExecutionResult:
"""Result of a query execution.
This class encapsulates all possible outcomes of a query execution:
a DataFrame result, an error, or a store result (when results are
written to cloud storage).
Attributes:
results: The query results as a pandas DataFrame, or None if an error occurred.
error: The error that occurred during execution, or None if successful.
store_result: The store result if results were written to cloud storage.
"""
results: pandas.DataFrame | None = None
error: Exception | None = None
store_result: StoreResult | None = None
@dataclass(frozen=True)
class ProgressInfo:
"""Progress information for a running query.
Mirrors the ``execution_progress`` event sent by the SQL session.
"""
execution_id: str
tasks_total: int
tasks_completed: int
tasks_active: int