Skip to content

Commit f91c47a

Browse files
7ttpburmecia
andauthored
fix(s3): add delimiter option support for CSV files (supabase#561)
* fix(s3): add delimiter option support for CSV files * feat(s3): add csv delimiter table option --------- Co-authored-by: Bo Lu <[email protected]>
1 parent 63fa3e7 commit f91c47a

6 files changed

Lines changed: 65 additions & 2 deletions

File tree

docs/catalog/s3.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ The following options are available when creating S3 foreign tables:
159159
- `format` - File format, required. `csv`, `jsonl`, or `parquet`
160160
- `has_header` - If the CSV file has header, optional. `true` or `false`, default is `false`
161161
- `compress` - Compression algorithm, optional. One of `gzip`, `bzip2`, `xz`, `zlib`, default is no compression
162+
- `delimiter` - Field delimiter for CSV files, optional. Single character like `,`, `;`, `|`, or escaped sequence like `E'\t'` for tab, default is `,`
162163

163164
## Entities
164165

@@ -192,6 +193,25 @@ create foreign table s3.table_csv (
192193
);
193194
```
194195

196+
Using custom delimiter:
197+
198+
```sql
199+
create foreign table s3.table_tsv (
200+
name text,
201+
sex text,
202+
age text,
203+
height text,
204+
weight text
205+
)
206+
server s3_server
207+
options (
208+
uri 's3://bucket/s3_table.tsv',
209+
format 'csv',
210+
delimiter E'\t', -- Tab-separated values
211+
has_header 'false'
212+
);
213+
```
214+
195215
#### Notes
196216

197217
- All columns must be defined in the foreign table
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"name" "sex" "age" "height" "weight"
2+
"Bert" "M" 42 68 166
3+
"Alex" "M" 41 74 170
4+
"Carl" "M" 32 155

wrappers/src/fdw/s3_fdw/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ This is a foreign data wrapper for [AWS S3](https://aws.amazon.com/s3/). It is d
1010

1111
| Version | Date | Notes |
1212
| ------- | ---------- | ---------------------------------------------------- |
13+
| 0.1.6 | 2026-01-21 | Added csv delimiter foreign table option |
1314
| 0.1.5 | 2025-07-25 | Fixed parquet file reading position issue |
1415
| 0.1.4 | 2024-08-20 | Added `path_style_url` server option |
1516
| 0.1.2 | 2023-07-13 | Added fdw stats collection |

wrappers/src/fdw/s3_fdw/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ enum S3FdwError {
2020
#[error("invalid format option: '{0}', it can only be 'csv', 'jsonl' or 'parquet'")]
2121
InvalidFormatOption(String),
2222

23+
#[error("invalid delimiter option: '{0}', it must be exactly one character")]
24+
InvalidDelimiterOption(String),
25+
2326
#[error("invalid compression option: {0}")]
2427
InvalidCompressOption(String),
2528

wrappers/src/fdw/s3_fdw/s3_fdw.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ enum Parser {
2525
}
2626

2727
#[wrappers_fdw(
28-
version = "0.1.5",
28+
version = "0.1.6",
2929
author = "Supabase",
3030
website = "https://github.com/supabase/wrappers/tree/main/wrappers/src/fdw/s3_fdw",
3131
error_type = "S3FdwError"
@@ -37,6 +37,7 @@ pub(crate) struct S3Fdw {
3737
parser: Parser,
3838
tgt_cols: Vec<Column>,
3939
rows_out: i64,
40+
csv_delimiter: u8,
4041

4142
// local string buffer for CSV and JSONL
4243
buf: String,
@@ -86,6 +87,7 @@ impl S3Fdw {
8687
buf.extend(self.buf.as_bytes());
8788
*rdr = csv::ReaderBuilder::new()
8889
.has_headers(false)
90+
.delimiter(self.csv_delimiter)
8991
.from_reader(Cursor::new(buf));
9092
}
9193
Parser::JsonLine(records) => {
@@ -124,6 +126,7 @@ impl ForeignDataWrapper<S3FdwError> for S3Fdw {
124126
parser: Parser::JsonLine(VecDeque::new()),
125127
tgt_cols: Vec::new(),
126128
rows_out: 0,
129+
csv_delimiter: b',',
127130
buf: String::new(),
128131
};
129132

@@ -225,6 +228,14 @@ impl ForeignDataWrapper<S3FdwError> for S3Fdw {
225228

226229
let has_header: bool = options.get("has_header") == Some(&"true".to_string());
227230

231+
if let Some(delimiter) = options.get("delimiter") {
232+
if delimiter.len() == 1 {
233+
self.csv_delimiter = delimiter.as_bytes()[0];
234+
} else {
235+
return Err(S3FdwError::InvalidDelimiterOption(delimiter.to_string()));
236+
}
237+
}
238+
228239
self.tgt_cols = columns.to_vec();
229240

230241
if let Some(client) = &self.client {

wrappers/src/fdw/s3_fdw/tests.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ mod tests {
4141
OPTIONS (
4242
uri 's3://warehouse/test_data.csv',
4343
format 'csv',
44-
has_header 'true'
44+
has_header 'true',
45+
delimiter ','
4546
)
4647
"#,
4748
None,
@@ -178,6 +179,28 @@ mod tests {
178179
)
179180
.unwrap();
180181

182+
c.update(
183+
r#"
184+
CREATE FOREIGN TABLE s3_test_table_tsv (
185+
name text,
186+
sex text,
187+
age text,
188+
height text,
189+
weight text
190+
)
191+
SERVER s3_server
192+
OPTIONS (
193+
uri 's3://warehouse/test_data.tsv',
194+
format 'csv',
195+
has_header 'true',
196+
delimiter E'\t'
197+
)
198+
"#,
199+
None,
200+
&[],
201+
)
202+
.unwrap();
203+
181204
let check_test_table = |table| {
182205
let sql = format!("SELECT * FROM {table} ORDER BY name LIMIT 1");
183206
let results = c
@@ -197,6 +220,7 @@ mod tests {
197220
check_test_table("s3_test_table_csv_gz");
198221
check_test_table("s3_test_table_jsonl");
199222
check_test_table("s3_test_table_jsonl_bz");
223+
check_test_table("s3_test_table_tsv");
200224

201225
let check_parquet_table = |table| {
202226
let sql = format!("SELECT * FROM {table} ORDER BY id LIMIT 1");

0 commit comments

Comments
 (0)