-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathexample_arrow_output.py
More file actions
73 lines (63 loc) · 2.73 KB
/
example_arrow_output.py
File metadata and controls
73 lines (63 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
"""
Example script demonstrating Arrow table output functionality.
This script shows how to use the new output_format parameter to return
PyArrow tables instead of pandas DataFrames, which is particularly useful
for working with GeoArrow geometries.
"""
from wherobots.db import connect
from wherobots.db.constants import ResultsFormat, OutputFormat, GeometryRepresentation
from wherobots.db.runtime import Runtime
from wherobots.db.region import Region
def example_arrow_usage():
"""
Example of how to use the new Arrow output functionality.
Note: This is a code example only - it would need valid credentials
to actually run against a Wherobots DB instance.
"""
# Example 1: Return Arrow tables instead of pandas DataFrames
with connect(
host="api.cloud.wherobots.com",
api_key="your_api_key",
runtime=Runtime.TINY,
results_format=ResultsFormat.ARROW, # Efficient wire format
output_format=OutputFormat.ARROW, # Return Arrow tables
geometry_representation=GeometryRepresentation.WKB,
region=Region.AWS_US_WEST_2
) as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM buildings LIMIT 1000")
results = cursor.fetchall()
# results is now a pyarrow.Table instead of pandas.DataFrame
print(f"Result type: {type(results)}")
print(f"Schema: {results.schema}")
print(f"Row count: {len(results)}")
# Work with Arrow table directly (great for GeoArrow!)
# Convert to pandas only when needed:
# df = results.to_pandas()
# Example 2: Default behavior (backwards compatible)
with connect(
host="api.cloud.wherobots.com",
api_key="your_api_key",
runtime=Runtime.TINY,
results_format=ResultsFormat.ARROW,
# output_format defaults to OutputFormat.PANDAS
geometry_representation=GeometryRepresentation.WKB,
region=Region.AWS_US_WEST_2
) as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM buildings LIMIT 1000")
results = cursor.fetchall()
# results is a pandas.DataFrame (existing behavior)
print(f"Result type: {type(results)}")
if __name__ == "__main__":
print("Arrow Table Output Example")
print("=" * 30)
print("This example shows how to use the new output_format parameter.")
print("Uncomment and provide valid credentials to run against Wherobots DB.")
print()
print("Key benefits of Arrow output:")
print("- More efficient for large datasets")
print("- Native support for GeoArrow geometries")
print("- Better interoperability with Arrow ecosystem")
print("- Zero-copy operations when possible")