diff --git a/README.md b/README.md index 6e82d7a..3a45d89 100644 --- a/README.md +++ b/README.md @@ -1520,6 +1520,47 @@ $result->namedBindings; Unregistered columns fall through to value-based inference: `int → Int64`, `float → Float64`, `bool → UInt8`, `null → Nullable(String)`, `DateTimeInterface → DateTime64(3)`, everything else → `String`. Register types via `withParamType($column, $type)` or `withParamTypes($map)` whenever the inference rule doesn't match the column's ClickHouse declaration. The positional `$bindings` array is still exposed on the resulting `Statement` for callers that prefer it. +**Bulk insert** — emit the canonical `INSERT INTO FORMAT ` envelope together with the serialized row payload in a single typed call. The returned `FormattedInsertStatement` exposes `->query` (the envelope) and `->body` (the format-specific payload) so the caller can ship both to ClickHouse's HTTP interface without hand-assembling either side: + +```php +use Utopia\Query\Builder\ClickHouse as Builder; +use Utopia\Query\Builder\ClickHouse\Format; + +$statement = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1, 'event' => 'login', 'time' => '2024-01-01 00:00:00'], + ['id' => 2, 'event' => 'logout', 'time' => '2024-01-01 00:00:05'], + ]); + +// $statement->query +// INSERT INTO `events` (`id`, `event`, `time`) FORMAT JSONEachRow +// +// $statement->body +// {"id":1,"event":"login","time":"2024-01-01 00:00:00"} +// {"id":2,"event":"logout","time":"2024-01-01 00:00:05"} +``` + +Ship the result over the HTTP interface by passing `$statement->query` as the `?query=` parameter and `$statement->body` as the POST body. Columns are derived from the first row's keys; pass an explicit third argument to pin the order or fill missing keys with `null`: + +```php +$statement = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, $rows, ['id', 'event', 'time']); +``` + +The `Format` enum currently supports `Format::JSONEachRow` and `Format::TabSeparated`. JSONEachRow rows are encoded with `JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE` (slashes and non-ASCII are preserved verbatim); TabSeparated escapes `\\`, `\t`, `\n`, `\r` and emits `\N` for `null`. An empty row iterable produces an empty body, which ClickHouse accepts as a zero-row ingest. The iterable is consumed eagerly — pass a generator if you want to defer row construction, but the serialized body is materialized in full before the statement is returned. + +`bulkInsert()` is the recommended entry point for FORMAT-based ingest — it covers the full envelope + body contract in one typed call. The lower-level `insertFormat()` setter pairs with `insert()` for the envelope-only path (returns `body = null`) and is retained for callers that stream the payload separately. Both paths share the same envelope emitter, so the resulting `query` is identical for equivalent inputs: + +```php +$statement = (new Builder()) + ->into('events') + ->insertFormat('JSONEachRow', ['id', 'event', 'time']) + ->insert(); +// $statement->body is null; assemble the payload separately. +``` + **UPDATE** — compiles to `ALTER TABLE ... UPDATE` with mandatory WHERE: ```php diff --git a/src/Query/Builder/ClickHouse.php b/src/Query/Builder/ClickHouse.php index 06fbc1d..9717e02 100644 --- a/src/Query/Builder/ClickHouse.php +++ b/src/Query/Builder/ClickHouse.php @@ -3,6 +3,7 @@ namespace Utopia\Query\Builder; use Utopia\Query\Builder as BaseBuilder; +use Utopia\Query\Builder\ClickHouse\Format; use Utopia\Query\Builder\ClickHouse\FormattedInsertStatement; use Utopia\Query\Builder\Feature\BitwiseAggregates; use Utopia\Query\Builder\Feature\ClickHouse\ApproximateAggregates; @@ -140,11 +141,58 @@ public function hint(string $hint): static } /** - * Declare a ClickHouse FORMAT pragma for the next INSERT. + * Recommended bulk-ingest entry point. Emits the canonical `INSERT INTO + *
[()] FORMAT ` envelope alongside the serialized row + * payload in a single typed call. The returned `FormattedInsertStatement` + * exposes `->query` (envelope, no bindings) and `->body` (format-specific + * payload) so the caller can ship both to ClickHouse's HTTP interface + * without hand-assembling either side. + * + * The target table must be set via `into()` first. Columns are derived + * from the keys of the first row when `$columns` is omitted; pass + * `$columns` explicitly to pin the order when row shapes vary or when + * an empty iterable is passed. An empty iterable produces an empty + * body — ClickHouse accepts this as a zero-row ingest. + * + * @param iterable> $rows + * @param list $columns Optional explicit column ordering. + */ + public function bulkInsert(Format $format, iterable $rows, array $columns = []): FormattedInsertStatement + { + $materialized = []; + foreach ($rows as $row) { + /** @phpstan-ignore function.alreadyNarrowedType */ + if (!\is_array($row)) { + throw new ValidationException('bulkInsert() rows must be associative arrays.'); + } + $materialized[] = $row; + } + + if (empty($columns) && !empty($materialized)) { + $columns = \array_keys($materialized[0]); + } + + $sql = $this->compileFormatInsertEnvelope($format->value, $columns); + + $body = $format->serialize($materialized, empty($columns) ? null : $columns); + + return new FormattedInsertStatement( + $sql, + [], + $columns, + $format->value, + $body, + executor: $this->executor, + ); + } + + /** + * Lower-level setter for the FORMAT envelope. Use `bulkInsert()` for the + * typed entry point; this method is retained for callers that need to + * stream the body payload separately (e.g. piping a pre-serialized stream + * straight into the HTTP request) — the subsequent `insert()` call emits + * the envelope only, with `body = null`. * - * When a format is set, `insert()` emits - * `INSERT INTO \`t\` (\`col1\`, \`col2\`) FORMAT ` with no VALUES. - * The row payload must be streamed into the HTTP body by the caller. * Column names are derived from the most recent `set()` call (values are * ignored). Pass `$columns` to declare them explicitly when no `set()` * call has been made. @@ -163,6 +211,38 @@ public function insertFormat(string $format, array $columns = []): static return $this; } + /** + * Build the shared `INSERT INTO
[()] FORMAT ` + * envelope. Validates the table, validates column names, quotes the + * table identifier, and wraps each column via `resolveAndWrap()`. + * Resets bindings so callers don't accumulate stale values from prior + * builder operations. + * + * @param list $columns + */ + private function compileFormatInsertEnvelope(string $format, array $columns): string + { + $this->bindings = []; + $this->validateTable(); + + foreach ($columns as $col) { + if ($col === '') { + throw new ValidationException('Column names for FORMAT INSERT must be non-empty strings.'); + } + } + + $wrappedColumns = empty($columns) + ? '' + : ' (' . \implode(', ', \array_map( + fn (string $col): string => $this->resolveAndWrap($col), + $columns + )) . ')'; + + return 'INSERT INTO ' . $this->quote($this->table) + . $wrappedColumns + . ' FORMAT ' . $format; + } + /** * @param array $settings */ @@ -618,31 +698,11 @@ public function insert(): Statement return $this->applyNamedTypedBindings(parent::insert()); } - $this->bindings = []; - $this->validateTable(); - $columns = !empty($this->insertFormatColumns) ? $this->insertFormatColumns : (!empty($this->rows) ? \array_keys($this->rows[0]) : []); - if (empty($columns)) { - throw new ValidationException('No columns specified for FORMAT INSERT. Pass columns to insertFormat() or call set() before insert().'); - } - - foreach ($columns as $col) { - if ($col === '') { - throw new ValidationException('Column names for FORMAT INSERT must be non-empty strings.'); - } - } - - $wrappedColumns = \array_map( - fn (string $col): string => $this->resolveAndWrap($col), - $columns - ); - - $sql = 'INSERT INTO ' . $this->quote($this->table) - . ' (' . \implode(', ', $wrappedColumns) . ')' - . ' FORMAT ' . $format; + $sql = $this->compileFormatInsertEnvelope($format, $columns); return new FormattedInsertStatement( $sql, diff --git a/src/Query/Builder/ClickHouse/Format.php b/src/Query/Builder/ClickHouse/Format.php new file mode 100644 index 0000000..c96354b --- /dev/null +++ b/src/Query/Builder/ClickHouse/Format.php @@ -0,0 +1,132 @@ + FORMAT ` envelope. Each case + * knows how to serialize a row iterable into the request body that + * ClickHouse expects for that format. + */ +enum Format: string +{ + case JSONEachRow = 'JSONEachRow'; + case TabSeparated = 'TabSeparated'; + + /** + * Serialize an iterable of associative rows into the body payload for + * this format. An empty iterable yields an empty string — ClickHouse + * accepts an empty body as a zero-row insert. + * + * When `$columns` is null the column ordering is derived from the keys + * of the first row encountered. Subsequent rows are serialized against + * whatever shape they themselves carry — there is no cross-row + * consistency check. The implications differ per format: + * + * - For positional formats (e.g. {@see Format::TabSeparated}) values + * are emitted in row-key order. If later rows reorder their keys the + * columns silently misalign with the envelope's column list. Pass + * `$columns` explicitly whenever row shapes are not guaranteed + * identical, or whenever the format is positional. + * - For named formats (e.g. {@see Format::JSONEachRow}) key ordering + * does not affect correctness because each value is paired with its + * key in the wire format. `$columns` still acts as a projection + * filter: rows missing a listed column receive `null`, and row keys + * outside the list are dropped. + * + * @param iterable> $rows + * @param list|null $columns Optional explicit column ordering. When null, derived from the keys of the first row. + */ + public function serialize(iterable $rows, ?array $columns = null): string + { + return match ($this) { + self::JSONEachRow => $this->serializeJsonEachRow($rows, $columns), + self::TabSeparated => $this->serializeTabSeparated($rows, $columns), + }; + } + + /** + * @param iterable> $rows + * @param list|null $columns + */ + private function serializeJsonEachRow(iterable $rows, ?array $columns): string + { + $lines = []; + foreach ($rows as $row) { + if ($columns !== null) { + $ordered = []; + foreach ($columns as $col) { + $ordered[$col] = $row[$col] ?? null; + } + $row = $ordered; + } + + $lines[] = \json_encode( + (object) $row, + JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE, + ); + } + + return \implode("\n", $lines); + } + + /** + * @param iterable> $rows + * @param list|null $columns + */ + private function serializeTabSeparated(iterable $rows, ?array $columns): string + { + $lines = []; + foreach ($rows as $row) { + $values = []; + + if ($columns === null) { + foreach ($row as $value) { + $values[] = $this->escapeTabSeparatedValue($value); + } + } else { + foreach ($columns as $col) { + $values[] = $this->escapeTabSeparatedValue($row[$col] ?? null); + } + } + + $lines[] = \implode("\t", $values); + } + + return \implode("\n", $lines); + } + + private function escapeTabSeparatedValue(mixed $value): string + { + if ($value === null) { + return '\\N'; + } + + if (\is_bool($value)) { + return $value ? '1' : '0'; + } + + if (\is_int($value) || \is_float($value)) { + return (string) $value; + } + + if (! \is_string($value)) { + if (\is_object($value) && \method_exists($value, '__toString')) { + $value = (string) $value; + } else { + throw new ValidationException('TabSeparated values must be scalar, null, or stringable. Received: ' . \get_debug_type($value)); + } + } + + return \strtr($value, [ + '\\' => '\\\\', + "\t" => '\\t', + "\n" => '\\n', + "\r" => '\\r', + ]); + } +} diff --git a/src/Query/Builder/ClickHouse/FormattedInsertStatement.php b/src/Query/Builder/ClickHouse/FormattedInsertStatement.php index 7b3c079..caa30cf 100644 --- a/src/Query/Builder/ClickHouse/FormattedInsertStatement.php +++ b/src/Query/Builder/ClickHouse/FormattedInsertStatement.php @@ -12,6 +12,7 @@ * @param list $bindings * @param list $columns * @param string $format + * @param ?string $body Serialized payload to ship as the HTTP request body alongside `$query`. Null when only the envelope query was produced (the caller assembles the body separately). * @param bool $readOnly * @param (Closure(Statement): (array|int))|null $executor */ @@ -20,6 +21,7 @@ public function __construct( array $bindings, public array $columns, public string $format, + public ?string $body = null, bool $readOnly = false, ?Closure $executor = null, ) { @@ -34,6 +36,7 @@ public function withExecutor(Closure $executor): self $this->bindings, $this->columns, $this->format, + $this->body, $this->readOnly, $executor, ); diff --git a/src/Query/QuotesIdentifiers.php b/src/Query/QuotesIdentifiers.php index 6a4b7c6..608411d 100644 --- a/src/Query/QuotesIdentifiers.php +++ b/src/Query/QuotesIdentifiers.php @@ -41,4 +41,28 @@ protected function quote(string $identifier): string return \implode('.', $wrapped); } + + /** + * Quote a single identifier without treating dots as qualifier separators. + * + * Use when the identifier is known to be atomic — e.g. a column name in a + * CREATE TABLE definition where the dot is a literal part of the name + * rather than a `schema.table.column` separator. The canonical case is + * ClickHouse's nested-array convention (`meta.key Array(String)`) where + * `meta.key` is a single top-level column whose name contains a dot. + */ + protected function quoteLiteral(string $identifier): string + { + if ($identifier === '*') { + return '*'; + } + + if (\preg_match('/[\x00-\x1f\x7f]/', $identifier) === 1) { + throw new ValidationException('Identifier contains control character'); + } + + return $this->wrapChar + . \str_replace($this->wrapChar, $this->wrapChar . $this->wrapChar, $identifier) + . $this->wrapChar; + } } diff --git a/src/Query/Schema.php b/src/Query/Schema.php index 0604e1c..bf590f9 100644 --- a/src/Query/Schema.php +++ b/src/Query/Schema.php @@ -26,6 +26,8 @@ public function setExecutor(Closure $executor): static abstract protected function quote(string $identifier): string; + abstract protected function quoteLiteral(string $identifier): string; + abstract protected function compileColumnType(Column $column): string; abstract protected function compileAutoIncrement(): string; @@ -51,7 +53,7 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen $columnDefs[] = $def; if ($column->isPrimary) { - $primaryKeys[] = $this->quote($column->name); + $primaryKeys[] = $this->quoteLiteral($column->name); } if ($column->isUnique) { $uniqueColumns[] = $column->name; @@ -72,13 +74,13 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen $columnDefs[] = 'PRIMARY KEY (' . \implode(', ', $primaryKeys) . ')'; } elseif (! empty($table->compositePrimaryKey)) { $columnDefs[] = 'PRIMARY KEY (' - . \implode(', ', \array_map(fn (string $c): string => $this->quote($c), $table->compositePrimaryKey)) + . \implode(', ', \array_map(fn (string $c): string => $this->quoteLiteral($c), $table->compositePrimaryKey)) . ')'; } // Inline UNIQUE constraints for columns marked unique foreach ($uniqueColumns as $col) { - $columnDefs[] = 'UNIQUE (' . $this->quote($col) . ')'; + $columnDefs[] = 'UNIQUE (' . $this->quoteLiteral($col) . ')'; } // Table-level CHECK constraints @@ -105,9 +107,9 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen // Foreign keys foreach ($table->foreignKeys as $fk) { - $def = 'FOREIGN KEY (' . $this->quote($fk->column) . ')' + $def = 'FOREIGN KEY (' . $this->quoteLiteral($fk->column) . ')' . ' REFERENCES ' . $this->quote($fk->refTable) - . ' (' . $this->quote($fk->refColumn) . ')'; + . ' (' . $this->quoteLiteral($fk->refColumn) . ')'; if ($fk->onDelete !== null) { $def .= ' ON DELETE ' . $fk->onDelete->toSql(); } @@ -138,18 +140,18 @@ public function compileAlter(Table $table): Statement $keyword = $column->isModify ? 'MODIFY COLUMN' : 'ADD COLUMN'; $def = $keyword . ' ' . $this->compileColumnDefinition($column); if ($column->after !== null) { - $def .= ' AFTER ' . $this->quote($column->after); + $def .= ' AFTER ' . $this->quoteLiteral($column->after); } $alterations[] = $def; } foreach ($table->renameColumns as $rename) { - $alterations[] = 'RENAME COLUMN ' . $this->quote($rename->from) - . ' TO ' . $this->quote($rename->to); + $alterations[] = 'RENAME COLUMN ' . $this->quoteLiteral($rename->from) + . ' TO ' . $this->quoteLiteral($rename->to); } foreach ($table->dropColumns as $col) { - $alterations[] = 'DROP COLUMN ' . $this->quote($col); + $alterations[] = 'DROP COLUMN ' . $this->quoteLiteral($col); } foreach ($table->indexes as $index) { @@ -168,9 +170,9 @@ public function compileAlter(Table $table): Statement } foreach ($table->foreignKeys as $fk) { - $def = 'ADD FOREIGN KEY (' . $this->quote($fk->column) . ')' + $def = 'ADD FOREIGN KEY (' . $this->quoteLiteral($fk->column) . ')' . ' REFERENCES ' . $this->quote($fk->refTable) - . ' (' . $this->quote($fk->refColumn) . ')'; + . ' (' . $this->quoteLiteral($fk->refColumn) . ')'; if ($fk->onDelete !== null) { $def .= ' ON DELETE ' . $fk->onDelete->toSql(); } @@ -267,7 +269,7 @@ public function dropIndex(string $table, string $name): Statement protected function compileColumnDefinition(Column $column): string { $parts = [ - $this->quote($column->name), + $this->quoteLiteral($column->name), $this->compileColumnType($column), ]; @@ -369,7 +371,7 @@ protected function compileIndexColumns(Schema\Index $index): string $parts = []; foreach ($index->columns as $col) { - $part = $this->quote($col); + $part = $this->quoteLiteral($col); if (isset($index->collations[$col])) { $collation = $index->collations[$col]; diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index adacd7e..013f283 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -144,7 +144,7 @@ protected function compileColumnDefinition(Column $column): string } $parts = [ - $this->quote($column->name), + $this->quoteLiteral($column->name), $this->compileColumnType($column), ]; @@ -190,12 +190,12 @@ public function compileAlter(Table $table): Statement } foreach ($table->renameColumns as $rename) { - $alterations[] = 'RENAME COLUMN ' . $this->quote($rename->from) - . ' TO ' . $this->quote($rename->to); + $alterations[] = 'RENAME COLUMN ' . $this->quoteLiteral($rename->from) + . ' TO ' . $this->quoteLiteral($rename->to); } foreach ($table->dropColumns as $col) { - $alterations[] = 'DROP COLUMN ' . $this->quote($col); + $alterations[] = 'DROP COLUMN ' . $this->quoteLiteral($col); } foreach ($table->dropIndexes as $name) { @@ -246,7 +246,7 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen $columnDefs[] = $def; if ($column->isPrimary) { - $primaryKeys[] = $this->quote($column->name); + $primaryKeys[] = $this->quoteLiteral($column->name); } } @@ -255,7 +255,7 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen } if (empty($primaryKeys) && ! empty($table->compositePrimaryKey)) { - $primaryKeys = \array_map(fn (string $c): string => $this->quote($c), $table->compositePrimaryKey); + $primaryKeys = \array_map(fn (string $c): string => $this->quoteLiteral($c), $table->compositePrimaryKey); } foreach ($table->rawColumnDefs as $rawDef) { @@ -294,7 +294,7 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen $sql .= ' ORDER BY ' . $table->orderByRaw; } else { $orderBy = ! empty($table->orderBy) - ? \array_map(fn (string $c): string => $this->quote($c), $table->orderBy) + ? \array_map(fn (string $c): string => $this->quoteLiteral($c), $table->orderBy) : $primaryKeys; $sql .= ! empty($orderBy) @@ -337,7 +337,7 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen */ private function compileSkipIndex(Index $index): string { - $cols = \array_map(fn (string $c): string => $this->quote($c), $index->columns); + $cols = \array_map(fn (string $c): string => $this->quoteLiteral($c), $index->columns); $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; if ($index->algorithm === null) { @@ -383,16 +383,16 @@ private function compileEngine(Engine $engine, array $args): string Engine::AggregatingMergeTree => $engine->value . '()', Engine::ReplacingMergeTree => $engine->value . '(' - . (isset($args[0]) ? $this->quote($args[0]) : '') + . (isset($args[0]) ? $this->quoteLiteral($args[0]) : '') . ')', Engine::SummingMergeTree => $engine->value . '(' . (empty($args) ? '' - : \implode(', ', \array_map(fn (string $c): string => $this->quote($c), $args))) + : \implode(', ', \array_map(fn (string $c): string => $this->quoteLiteral($c), $args))) . ')', - Engine::CollapsingMergeTree => $engine->value . '(' . $this->quote($args[0]) . ')', + Engine::CollapsingMergeTree => $engine->value . '(' . $this->quoteLiteral($args[0]) . ')', Engine::ReplicatedMergeTree => $engine->value . "('" . \str_replace("'", "''", $args[0]) . "'" @@ -470,7 +470,7 @@ public function commentOnTable(string $table, string $comment): Statement public function commentOnColumn(string $table, string $column, string $comment): Statement { return new Statement( - 'ALTER TABLE ' . $this->quote($table) . ' COMMENT COLUMN ' . $this->quote($column) . " '" . str_replace(['\\', "'"], ['\\\\', "''"], $comment) . "'", + 'ALTER TABLE ' . $this->quote($table) . ' COMMENT COLUMN ' . $this->quoteLiteral($column) . " '" . str_replace(['\\', "'"], ['\\\\', "''"], $comment) . "'", [], executor: $this->executor, ); diff --git a/src/Query/Schema/MongoDB.php b/src/Query/Schema/MongoDB.php index ee42be3..d3a6a9c 100644 --- a/src/Query/Schema/MongoDB.php +++ b/src/Query/Schema/MongoDB.php @@ -24,6 +24,11 @@ protected function quote(string $identifier): string return $identifier; } + protected function quoteLiteral(string $identifier): string + { + return $identifier; + } + protected function compileColumnType(Column $column): string { if ($column->userTypeName !== null) { diff --git a/src/Query/Schema/MySQL.php b/src/Query/Schema/MySQL.php index 0a4eb6a..b5747de 100644 --- a/src/Query/Schema/MySQL.php +++ b/src/Query/Schema/MySQL.php @@ -101,7 +101,7 @@ public function changeColumn(string $table, string $oldName, string $newName, st { return new Statement( 'ALTER TABLE ' . $this->quote($table) - . ' CHANGE COLUMN ' . $this->quote($oldName) . ' ' . $this->quote($newName) . ' ' . $type, + . ' CHANGE COLUMN ' . $this->quoteLiteral($oldName) . ' ' . $this->quoteLiteral($newName) . ' ' . $type, [], executor: $this->executor, ); @@ -114,7 +114,7 @@ public function modifyColumn(string $table, string $name, string $type): Stateme { return new Statement( 'ALTER TABLE ' . $this->quote($table) - . ' MODIFY ' . $this->quote($name) . ' ' . $type, + . ' MODIFY ' . $this->quoteLiteral($name) . ' ' . $type, [], executor: $this->executor, ); diff --git a/src/Query/Schema/PostgreSQL.php b/src/Query/Schema/PostgreSQL.php index 01b4e6a..616886f 100644 --- a/src/Query/Schema/PostgreSQL.php +++ b/src/Query/Schema/PostgreSQL.php @@ -99,7 +99,7 @@ protected function compileUnsigned(): string protected function compileColumnDefinition(Column $column): string { $parts = [ - $this->quote($column->name), + $this->quoteLiteral($column->name), $this->compileColumnType($column), ]; @@ -149,7 +149,7 @@ protected function compileColumnDefinition(Column $column): string // PostgreSQL enum emulation via CHECK constraint if ($column->type === ColumnType::Enum && ! empty($column->enumValues)) { $values = \array_map(fn (string $v): string => "'" . \str_replace(['\\', "'"], ['\\\\', "''"], $v) . "'", $column->enumValues); - $parts[] = 'CHECK (' . $this->quote($column->name) . ' IN (' . \implode(', ', $values) . '))'; + $parts[] = 'CHECK (' . $this->quoteLiteral($column->name) . ' IN (' . \implode(', ', $values) . '))'; } if ($column->checkExpression !== null) { @@ -352,7 +352,7 @@ public function compileAlter(Table $table): Statement foreach ($table->columns as $column) { $keyword = $column->isModify ? 'ALTER COLUMN' : 'ADD COLUMN'; if ($column->isModify) { - $def = $keyword . ' ' . $this->quote($column->name) + $def = $keyword . ' ' . $this->quoteLiteral($column->name) . ' TYPE ' . $this->compileColumnType($column); } else { $def = $keyword . ' ' . $this->compileColumnDefinition($column); @@ -361,18 +361,18 @@ public function compileAlter(Table $table): Statement } foreach ($table->renameColumns as $rename) { - $alterations[] = 'RENAME COLUMN ' . $this->quote($rename->from) - . ' TO ' . $this->quote($rename->to); + $alterations[] = 'RENAME COLUMN ' . $this->quoteLiteral($rename->from) + . ' TO ' . $this->quoteLiteral($rename->to); } foreach ($table->dropColumns as $col) { - $alterations[] = 'DROP COLUMN ' . $this->quote($col); + $alterations[] = 'DROP COLUMN ' . $this->quoteLiteral($col); } foreach ($table->foreignKeys as $fk) { - $def = 'ADD FOREIGN KEY (' . $this->quote($fk->column) . ')' + $def = 'ADD FOREIGN KEY (' . $this->quoteLiteral($fk->column) . ')' . ' REFERENCES ' . $this->quote($fk->refTable) - . ' (' . $this->quote($fk->refColumn) . ')'; + . ' (' . $this->quoteLiteral($fk->refColumn) . ')'; if ($fk->onDelete !== null) { $def .= ' ON DELETE ' . $fk->onDelete->toSql(); } @@ -500,7 +500,7 @@ public function alterColumnType(string $table, string $column, string $type, str } $sql = 'ALTER TABLE ' . $this->quote($table) - . ' ALTER COLUMN ' . $this->quote($column) + . ' ALTER COLUMN ' . $this->quoteLiteral($column) . ' TYPE ' . $type; if ($using !== '') { @@ -589,7 +589,7 @@ public function commentOnTable(string $table, string $comment): Statement public function commentOnColumn(string $table, string $column, string $comment): Statement { return new Statement( - 'COMMENT ON COLUMN ' . $this->quote($table) . '.' . $this->quote($column) . " IS '" . str_replace(['\\', "'"], ['\\\\', "''"], $comment) . "'", + 'COMMENT ON COLUMN ' . $this->quote($table) . '.' . $this->quoteLiteral($column) . " IS '" . str_replace(['\\', "'"], ['\\\\', "''"], $comment) . "'", [], executor: $this->executor, ); diff --git a/src/Query/Schema/SQLite.php b/src/Query/Schema/SQLite.php index 99ede13..8ed59b3 100644 --- a/src/Query/Schema/SQLite.php +++ b/src/Query/Schema/SQLite.php @@ -62,7 +62,7 @@ protected function compileColumnDefinition(Column $column): string } $parts = [ - $this->quote($column->name), + $this->quoteLiteral($column->name), $this->compileColumnType($column), 'PRIMARY KEY', $this->compileAutoIncrement(), diff --git a/src/Query/Schema/Trait/ForeignKeys.php b/src/Query/Schema/Trait/ForeignKeys.php index 8e60ee2..6fe9cae 100644 --- a/src/Query/Schema/Trait/ForeignKeys.php +++ b/src/Query/Schema/Trait/ForeignKeys.php @@ -18,9 +18,9 @@ public function addForeignKey( ): Statement { $sql = 'ALTER TABLE ' . $this->quote($table) . ' ADD CONSTRAINT ' . $this->quote($name) - . ' FOREIGN KEY (' . $this->quote($column) . ')' + . ' FOREIGN KEY (' . $this->quoteLiteral($column) . ')' . ' REFERENCES ' . $this->quote($refTable) - . ' (' . $this->quote($refColumn) . ')'; + . ' (' . $this->quoteLiteral($refColumn) . ')'; if ($onDelete !== null) { $sql .= ' ON DELETE ' . $onDelete->toSql(); diff --git a/tests/Query/Builder/Feature/ClickHouse/BulkInsertTest.php b/tests/Query/Builder/Feature/ClickHouse/BulkInsertTest.php new file mode 100644 index 0000000..c69d781 --- /dev/null +++ b/tests/Query/Builder/Feature/ClickHouse/BulkInsertTest.php @@ -0,0 +1,399 @@ +into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1, 'event' => 'login', 'time' => '2024-01-01 00:00:00'], + ]); + + $this->assertInstanceOf(FormattedInsertStatement::class, $result); + $this->assertSame( + 'INSERT INTO `events` (`id`, `event`, `time`) FORMAT JSONEachRow', + $result->query + ); + $this->assertSame([], $result->bindings); + $this->assertSame(['id', 'event', 'time'], $result->columns); + $this->assertSame('JSONEachRow', $result->format); + $this->assertSame( + '{"id":1,"event":"login","time":"2024-01-01 00:00:00"}', + $result->body, + ); + } + + public function testBulkInsertMultipleRowsJsonEachRow(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1, 'event' => 'login'], + ['id' => 2, 'event' => 'logout'], + ['id' => 3, 'event' => 'view'], + ]); + + $this->assertSame( + 'INSERT INTO `events` (`id`, `event`) FORMAT JSONEachRow', + $result->query + ); + $this->assertSame( + '{"id":1,"event":"login"}' . "\n" + . '{"id":2,"event":"logout"}' . "\n" + . '{"id":3,"event":"view"}', + $result->body, + ); + $this->assertStringEndsNotWith("\n", (string) $result->body); + } + + public function testBulkInsertEmptyIterableEmitsEmptyBody(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [], ['id', 'event']); + + $this->assertSame( + 'INSERT INTO `events` (`id`, `event`) FORMAT JSONEachRow', + $result->query + ); + $this->assertSame('', $result->body); + $this->assertSame(['id', 'event'], $result->columns); + } + + public function testBulkInsertEmptyIterableWithoutColumnsOmitsColumnList(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, []); + + $this->assertSame( + 'INSERT INTO `events` FORMAT JSONEachRow', + $result->query + ); + $this->assertSame('', $result->body); + $this->assertSame([], $result->columns); + } + + public function testBulkInsertJsonEachRowEscapesSpecialCharacters(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1, 'note' => "tab\there\nnewline\"quote\\back"], + ]); + + $this->assertSame( + '{"id":1,"note":"tab\\there\\nnewline\\"quote\\\\back"}', + $result->body, + ); + } + + public function testBulkInsertJsonEachRowPreservesUnicodeAndSlashes(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['path' => '/api/v1/events', 'label' => 'café — 日本'], + ]); + + $this->assertSame( + '{"path":"/api/v1/events","label":"café — 日本"}', + $result->body, + ); + } + + public function testBulkInsertJsonEachRowSerializesNull(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1, 'note' => null], + ]); + + $this->assertSame('{"id":1,"note":null}', $result->body); + } + + public function testBulkInsertExplicitColumnsPinOrderAndFillMissingKeysWithNull(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert( + Format::JSONEachRow, + [ + ['id' => 1, 'event' => 'login'], + ['event' => 'view', 'id' => 2], + ['id' => 3], + ], + ['id', 'event'], + ); + + $this->assertSame(['id', 'event'], $result->columns); + $this->assertSame( + '{"id":1,"event":"login"}' . "\n" + . '{"id":2,"event":"view"}' . "\n" + . '{"id":3,"event":null}', + $result->body, + ); + } + + public function testBulkInsertPreservesLiteralDotInTableName(): void + { + $result = (new Builder()) + ->into('my.namespace') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1], + ]); + + $this->assertSame( + 'INSERT INTO `my`.`namespace` (`id`) FORMAT JSONEachRow', + $result->query + ); + } + + public function testBulkInsertTabSeparated(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::TabSeparated, [ + ['id' => 1, 'event' => 'login'], + ['id' => 2, 'event' => 'logout'], + ]); + + $this->assertSame( + 'INSERT INTO `events` (`id`, `event`) FORMAT TabSeparated', + $result->query + ); + $this->assertSame( + "1\tlogin\n2\tlogout", + $result->body, + ); + $this->assertSame('TabSeparated', $result->format); + } + + public function testBulkInsertTabSeparatedEscapesControlCharacters(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::TabSeparated, [ + ['id' => 1, 'note' => "a\tb\nc\\d"], + ]); + + $this->assertSame( + "1\ta\\tb\\nc\\\\d", + $result->body, + ); + } + + public function testBulkInsertTabSeparatedRendersNullAsBackslashN(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::TabSeparated, [ + ['id' => 1, 'note' => null], + ]); + + $this->assertSame("1\t\\N", $result->body); + } + + public function testBulkInsertTabSeparatedRendersBooleansAsZeroOne(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::TabSeparated, [ + ['id' => 1, 'active' => true, 'archived' => false], + ]); + + $this->assertSame("1\t1\t0", $result->body); + } + + public function testBulkInsertTabSeparatedRejectsArrayValues(): void + { + $this->expectException(ValidationException::class); + + (new Builder()) + ->into('events') + ->bulkInsert(Format::TabSeparated, [ + ['id' => 1, 'tags' => ['a', 'b']], + ]); + } + + public function testBulkInsertAcceptsGenerator(): void + { + $generator = (function (): iterable { + yield ['id' => 1, 'event' => 'login']; + yield ['id' => 2, 'event' => 'logout']; + })(); + + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, $generator); + + $this->assertSame( + 'INSERT INTO `events` (`id`, `event`) FORMAT JSONEachRow', + $result->query + ); + $this->assertSame( + '{"id":1,"event":"login"}' . "\n" . '{"id":2,"event":"logout"}', + $result->body, + ); + } + + public function testBulkInsertRejectsNonAssociativeRow(): void + { + $this->expectException(ValidationException::class); + + $generator = (function (): iterable { + yield 'not-a-row'; + })(); + + (new Builder()) + ->into('events') + /** @phpstan-ignore argument.type */ + ->bulkInsert(Format::JSONEachRow, $generator); + } + + public function testBulkInsertRejectsEmptyColumnName(): void + { + $this->expectException(ValidationException::class); + + (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [['id' => 1]], ['']); + } + + public function testBulkInsertRequiresTable(): void + { + $this->expectException(ValidationException::class); + + (new Builder()) + ->bulkInsert(Format::JSONEachRow, [['id' => 1]]); + } + + public function testFormattedInsertStatementWithExecutorPreservesBody(): void + { + $result = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [['id' => 1]]); + + $executor = fn (): int => 0; + $rebound = $result->withExecutor($executor); + + $this->assertInstanceOf(FormattedInsertStatement::class, $rebound); + $this->assertSame($result->query, $rebound->query); + $this->assertSame($result->bindings, $rebound->bindings); + $this->assertSame($result->columns, $rebound->columns); + $this->assertSame($result->format, $rebound->format); + $this->assertSame($result->body, $rebound->body); + } + + public function testInsertFormatEnvelopeStillEmitsNullBodyForBackCompat(): void + { + $result = (new Builder()) + ->into('events') + ->insertFormat('JSONEachRow', ['id', 'event']) + ->insert(); + + $this->assertInstanceOf(FormattedInsertStatement::class, $result); + $this->assertNull($result->body); + } + + public function testBulkInsertAndInsertFormatEmitIdenticalEnvelopeForSameInputs(): void + { + $bulk = (new Builder()) + ->into('events') + ->bulkInsert(Format::JSONEachRow, [ + ['id' => 1, 'event' => 'login'], + ]); + + $envelope = (new Builder()) + ->into('events') + ->insertFormat('JSONEachRow', ['id', 'event']) + ->insert(); + + $this->assertInstanceOf(FormattedInsertStatement::class, $envelope); + $this->assertSame($bulk->query, $envelope->query); + $this->assertSame($bulk->columns, $envelope->columns); + $this->assertSame($bulk->format, $envelope->format); + } + + public function testInsertFormatEnvelopeQuotesTableWithLiteralDot(): void + { + $envelope = (new Builder()) + ->into('my.namespace') + ->insertFormat('JSONEachRow', ['id']) + ->insert(); + + $bulk = (new Builder()) + ->into('my.namespace') + ->bulkInsert(Format::JSONEachRow, [['id' => 1]]); + + $this->assertSame( + 'INSERT INTO `my`.`namespace` (`id`) FORMAT JSONEachRow', + $envelope->query, + ); + $this->assertSame($envelope->query, $bulk->query); + } + + public function testInsertFormatRejectsEmptyColumnNameMatchingBulkInsert(): void + { + $this->expectException(ValidationException::class); + + (new Builder()) + ->into('events') + ->insertFormat('JSONEachRow', ['id', '']) + ->insert(); + } + + public function testBulkInsertDoesNotPersistFormatStateOnBuilder(): void + { + $builder = (new Builder()) + ->into('events'); + + $builder->bulkInsert(Format::JSONEachRow, [['id' => 1]]); + + $regular = $builder + ->into('users') + ->set(['name' => 'alice']) + ->insert(); + + $this->assertNotInstanceOf(FormattedInsertStatement::class, $regular); + $this->assertStringNotContainsString('FORMAT', $regular->query); + $this->assertSame( + 'INSERT INTO `users` (`name`) VALUES (?)', + $regular->query, + ); + $this->assertSame(['alice'], $regular->bindings); + } + + public function testFormatSerializeExplicitColumnsPinOrderingAcrossInconsistentRows(): void + { + $rows = [ + ['id' => 1, 'event' => 'login'], + ['event' => 'view', 'id' => 2], + ['id' => 3], + ]; + + $tabSeparated = Format::TabSeparated->serialize($rows, ['id', 'event']); + $this->assertSame( + "1\tlogin\n2\tview\n3\t\\N", + $tabSeparated, + ); + + $jsonEachRow = Format::JSONEachRow->serialize($rows, ['id', 'event']); + $this->assertSame( + '{"id":1,"event":"login"}' . "\n" + . '{"id":2,"event":"view"}' . "\n" + . '{"id":3,"event":null}', + $jsonEachRow, + ); + } +} diff --git a/tests/Query/Builder/Feature/ClickHouse/InsertFormatTest.php b/tests/Query/Builder/Feature/ClickHouse/InsertFormatTest.php index 6e0138c..4ea0221 100644 --- a/tests/Query/Builder/Feature/ClickHouse/InsertFormatTest.php +++ b/tests/Query/Builder/Feature/ClickHouse/InsertFormatTest.php @@ -112,14 +112,20 @@ public function testInsertFormatRejectsEmptyColumnName(): void ->insert(); } - public function testInsertFormatRequiresColumns(): void + public function testInsertFormatWithoutColumnsEmitsBareEnvelope(): void { - $this->expectException(ValidationException::class); - - (new Builder()) + $result = (new Builder()) ->into('events') ->insertFormat('JSONEachRow') ->insert(); + + $this->assertInstanceOf(FormattedInsertStatement::class, $result); + $this->assertSame( + 'INSERT INTO `events` FORMAT JSONEachRow', + $result->query + ); + $this->assertSame([], $result->columns); + $this->assertNull($result->body); } public function testInsertWithoutFormatStillEmitsValues(): void diff --git a/tests/Query/Fixture/QuotesIdentifiersHarness.php b/tests/Query/Fixture/QuotesIdentifiersHarness.php index fd62356..a7d667c 100644 --- a/tests/Query/Fixture/QuotesIdentifiersHarness.php +++ b/tests/Query/Fixture/QuotesIdentifiersHarness.php @@ -11,5 +11,6 @@ final class QuotesIdentifiersHarness { use QuotesIdentifiers { quote as public; + quoteLiteral as public; } } diff --git a/tests/Query/QuotesIdentifiersTest.php b/tests/Query/QuotesIdentifiersTest.php index 36df425..b242a0d 100644 --- a/tests/Query/QuotesIdentifiersTest.php +++ b/tests/Query/QuotesIdentifiersTest.php @@ -4,6 +4,7 @@ use PHPUnit\Framework\TestCase; use Tests\Query\Fixture\QuotesIdentifiersHarness; +use Utopia\Query\Exception\ValidationException; final class QuotesIdentifiersTest extends TestCase { @@ -53,4 +54,35 @@ public function testStarOnlyAllowedBareInFinalSegment(): void { $this->assertSame('`a`.`b`.*', $this->wrapper->quote('a.b.*')); } + + public function testQuoteLiteralPreservesDot(): void + { + $this->assertSame('`meta.key`', $this->wrapper->quoteLiteral('meta.key')); + } + + public function testQuoteLiteralWrapsPlainIdentifier(): void + { + $this->assertSame('`plain_name`', $this->wrapper->quoteLiteral('plain_name')); + } + + public function testQuoteLiteralDoublesWrapChar(): void + { + $this->assertSame('```weird```', $this->wrapper->quoteLiteral('`weird`')); + } + + public function testQuoteLiteralPreservesBareStar(): void + { + $this->assertSame('*', $this->wrapper->quoteLiteral('*')); + } + + public function testQuoteLiteralTreatsTrailingStarAsLiteral(): void + { + $this->assertSame('`users.*`', $this->wrapper->quoteLiteral('users.*')); + } + + public function testQuoteLiteralRejectsControlCharacter(): void + { + $this->expectException(ValidationException::class); + $this->wrapper->quoteLiteral("contains\x00null"); + } } diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 1c9803b..da3e548 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -1311,6 +1311,22 @@ public function testCreateTableArrayColumn(): void ); } + public function testCreateTableArrayWithDottedColumnName(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->array('meta.key', ColumnType::String) + ->array('meta.value', ColumnType::String) + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `meta.key` Array(String), `meta.value` Array(String)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + public function testCreateTableArrayUnsignedInteger(): void { $schema = new Schema();