diff --git a/README.md b/README.md index 2019db3..868c1f1 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,26 @@ else)` evaluates only the branch the condition selects, so `if(n != 0, total/n, needed), but you can shadow any of them by registering a function or env value of the same name. +## Pipelines + +The pipe operator `a | f(x)` compiles as `f(a, x)`, so chained +transformations read left to right instead of inside out: + +```go +p, err := expr.Compile( + `checks | filter(!it.ok) | map(it.name) | join(", ")`, + expr.WithBuiltins(), + expr.WithFunctions(expr.StringFuncs()), +) +``` + +The pipe is compile-time sugar over ordinary calls, so it composes with +everything above: builtins, your registered functions, and the +higher-order forms in both shapes. In Go this token means bitwise or, +which expr has always rejected, so the pipe changed the meaning of no +existing expression. Design rationale lives in +[RFC 0001](docs/rfcs/0001-pipe-operator.md). + ## What it isn't `expr` evaluates a **single expression**. No statements, no `:=`, no diff --git a/boundaries1_test.go b/boundaries1_test.go index 791ece8..858d0cb 100644 --- a/boundaries1_test.go +++ b/boundaries1_test.go @@ -1244,7 +1244,7 @@ func TestSyntax_UnsupportedNodesAllReject(t *testing.T) { "*p", // unary * (unsupported token) "&x", // address-of "1 & 2", // bitwise AND - "1 | 2", // bitwise OR + "1 | 2", // pipe with non-call right side "1 ^ 2", // bitwise XOR "1 << 2", // shift left "1 >> 2", // shift right diff --git a/cmd/expr/main.go b/cmd/expr/main.go index 299c736..109302e 100644 --- a/cmd/expr/main.go +++ b/cmd/expr/main.go @@ -32,6 +32,7 @@ func main() { fmt.Fprintf(os.Stderr, " expr -i '{\"user\":{\"age\":36}}' 'user.age >= 18'\n") fmt.Fprintf(os.Stderr, " expr 'user.age >= 18' -i @user.json\n") fmt.Fprintf(os.Stderr, " echo '{\"x\":41}' | expr -i - 'x + 1'\n") + fmt.Fprintf(os.Stderr, " expr -i '{\"xs\":[3,1,2]}' 'xs | filter(it > 1) | len()'\n") } // Reorder arguments so that flags may appear before or after the diff --git a/docs/guides/examples.md b/docs/guides/examples.md index 452ec4c..5d212a9 100644 --- a/docs/guides/examples.md +++ b/docs/guides/examples.md @@ -554,3 +554,50 @@ sort(["banana", "apple"]) // → ["apple", "banana"] `sort` accepts all-numbers or all-strings; mixed types produce `ErrEvaluate`. It never mutates the input and returns a fresh `[]any`. `reverse` works on any list type and also returns a fresh copy. + +--- + +## 13. Pipelines + +`a | f(x)` compiles as `f(a, x)`, so a transformation chain reads +left to right instead of inside out. Rendering an error report: + +```go +events | + filter(it.kind == "error") | + map(sprintf("[%s] %s", it.source, it.message)) | + join("\n") +``` + +(As with every operator, the `|` goes at the end of a continuation +line, never at the start, because Go's semicolon insertion terminates +a line that ends in an identifier or `)`.) + +Env (compile with `WithBuiltins` for `sprintf` and `StringFuncs` for +`join`): + +```go +map[string]any{ + "events": []any{ + map[string]any{"kind": "error", "source": "api", "message": "timeout"}, + map[string]any{"kind": "info", "source": "api", "message": "ok"}, + map[string]any{"kind": "error", "source": "db", "message": "deadlock"}, + }, +} +``` + +Result: + +``` +[api] timeout +[db] deadlock +``` + +The pipe is sugar only: the chain above is exactly +`join(map(filter(events, ...), ...), "\n")` after compilation, and the +named-binding forms compose the same way +(`orders | filter(o, o.paid) | sortBy(o, o.total)`). The right side of +each `|` must be written as a call (`xs | len()`, not `xs | len`). See +the [spec](../reference/spec.md#pipeline-) for precedence rules. In +short: pipe first, compare after (`xs | count(it.ok) == 2` works; +`a == b | f()` is a compile error asking for parentheses). diff --git a/docs/guides/higher-order-patterns.md b/docs/guides/higher-order-patterns.md index 2cca571..06b70bc 100644 --- a/docs/guides/higher-order-patterns.md +++ b/docs/guides/higher-order-patterns.md @@ -149,6 +149,29 @@ map( ) ``` +## Pipelines: the same chains, left to right + +`a | f(x)` compiles as `f(a, x)`, so a filter-then-map chain can read +in execution order instead of inside out: + +``` +orders | filter(it.status == "paid") | map(it.id) +``` + +This is purely compile-time sugar over the nested form above. The +forms, bindings, and laziness rules are identical, and the three-arg +named-binding shape composes the same way: + +``` +orders | filter(o, o.status == "paid") | map(o, {o.id: o.total}) +``` + +The right side of each `|` must be written as a call (`xs | trim()`, +not `xs | trim`), and a pipe on the right-hand side of a comparison +must be parenthesized. Precedence details live in the +[spec](../reference/spec.md#pipeline-); a worked example is in +[examples.md](examples.md#13-pipelines). + ## Nested forms and the `it` rebinding rule Inside a nested two-arg higher-order form, `it` and `index` always refer diff --git a/docs/reference/spec.md b/docs/reference/spec.md index 69bd489..edae42e 100644 --- a/docs/reference/spec.md +++ b/docs/reference/spec.md @@ -64,14 +64,16 @@ Precedence and associativity come from `go/parser`. They match Go: | Precedence | Operators | Associativity | | ---------- | ------------------------------ | ------------- | | 5 (high) | `* / %` | left | -| 4 | `+ -` | left | +| 4 | `+ - \|` | left | | 3 | `== != < <= > >=` | left | | 2 | `&&` | left | | 1 (low) | `\|\|` | left | Unary `!`, `-`, `+` bind tighter than any binary operator. Parentheses -group expressions as usual. Go's bitwise operators (`&`, `|`, `^`, `<<`, -`>>`, `&^`) parse but are rejected at Compile time with `ErrCompile`. +group expressions as usual. The `|` token is the [pipeline +operator](#pipeline-), not bitwise or. Go's remaining bitwise +operators (`&`, `^`, `<<`, `>>`, `&^`) parse but are rejected at +Compile time with `ErrCompile`. ### Arithmetic (`+ - * / %`) @@ -110,6 +112,84 @@ group expressions as usual. Go's bitwise operators (`&`, `|`, `^`, `<<`, and `count || 0` falls back to `0` only when `count` is falsey. Where a strict bool is required, wrap with `bool(...)`. +### Pipeline (`|`) + +`a | f(x, y)` evaluates exactly as `f(a, x, y)`: the left side becomes +the first argument of the call on the right side. The rewrite happens once at Compile time, +so the pipeline has no runtime semantics of its own: a piped call is +the call, including for the higher-order special forms and their lazy +evaluation rules. Pipes chain left to right: + +``` +checks | filter(!it.ok) | map(sprintf("- %s: %s", it.name, it.msg)) | join("\n") +// identical to: +join(map(filter(checks, !it.ok), sprintf("- %s: %s", it.name, it.msg)), "\n") +``` + +The pipe is a deliberate deviation from expr's strict-Go-subset +identity: in Go this token means bitwise or. Before v1.2.0, expr +rejected `|` at Compile time as an unsupported bitwise operator, so no +previously-compilable expression changes meaning under the pipeline +reading. The full design rationale lives in +[RFC 0001](../rfcs/0001-pipe-operator.md). + +The right-hand side must be written as a call. Anything else (an +identifier, a selector, an index, an optional access, a parenthesized +expression, a literal) is rejected at Compile time with `ErrCompile`: + +``` +xs | len() // ok: len(xs) +xs | f // ErrCompile: "f" is not a call (did you mean to write f(...)?) +xs | filter // ErrCompile: "filter" is a special form, did you mean + // to write filter(predicate)? +xs | f()[0] // ErrCompile: the right side parses as the index f()[0] +(xs | f())[0] // ok: index the piped result +xs | a?.b // ErrCompile: optional access is not a call +``` + +Because the rewrite is purely syntactic, it composes with every call +shape: the iterating forms (`xs | filter(it > 0)` is +`filter(xs, it > 0)`), the three-arg named-binding forms +(`orders | filter(o, o.paid)`), the lazy forms (`v | try(fallback)` is +`try(v, fallback)`, with `v` still evaluated under try's error +handling), env-provided callables, and selector calls on env values. +`map` and `if` work even though they are Go keywords; the keyword +rewrite runs before parsing. + +Pipe targets follow the same resolution rules as any other call: an +unregistered name compiles (the env may provide the callable at Run) +and fails at evaluation with the usual unknown-function error. + +**Precedence.** `|` keeps Go's precedence: level 4, the same as `+` +and `-`. That is tighter than comparisons, `&&`, and `||`, and looser +than `*`, `/`, `%`, and unary operators. Postfix syntax (calls, `.field`, +`[idx]`, `?.`, `?[`) binds tighter still. Consequences: + +``` +xs | count(it > 1) == 2 // (xs | count(it > 1)) == 2: pipe, then compare +n + 1 | double() // (n + 1) | double(): same level, left-assoc +n | double() + 1 // (n | double()) + 1 +ok && xs | any(it > 0) // ok && (xs | any(it > 0)) +x > 2 | if("big", "small") // ErrCompile: ambiguous, see below +(x > 2) | if("big", "small") // parenthesize to pipe a comparison +``` + +One shape is rejected outright rather than silently mis-grouping: a +bare pipe as the **right** operand of a comparison. `a == b | f()` +parses as `a == (b | f())`, with the pipe consuming the comparison's +right operand, so expr fails compilation with an "ambiguous expression" +error demanding parentheses; write `(a | f()) == b` or `a == (b | f())` +to state which one you meant. A pipe on the *left* of a comparison +(`xs | count(it > 1) == 2`) is the useful, unambiguous order and needs +no parentheses. + +**Optional access.** An optional-access result pipes normally +(`user?.name | upper()` is `upper(user?.name)`; a nil receiver pipes +`nil` into the call, which the iterating forms treat as an empty +list). The reverse needs parentheses: `?.` binds tighter than `|`, so +accessing a field on a piped result is written +`(xs | find(it.ok))?.name`. + ### Unary - `!x` is logical negation using truthiness (so `!0` is `true`). @@ -743,7 +823,8 @@ Only these `ast.Expr` node kinds are accepted; everything else returns - `*ast.Ident` — identifiers - `*ast.ParenExpr` — `( x )` - `*ast.UnaryExpr` — `!x`, `-x`, `+x` -- `*ast.BinaryExpr` — arithmetic, comparison, logical +- `*ast.BinaryExpr` — arithmetic, comparison, logical, pipeline + (`a | f(x)`, desugared to `f(a, x)` at Compile time) - `*ast.SelectorExpr` — `x.y` - `*ast.IndexExpr` — `x[i]` - `*ast.CallExpr` — `f(a, b, ...)` @@ -760,7 +841,9 @@ with `ErrCompile`): - Function literals (`func() {}`) - Channel ops (`<-ch`, `ch <- v`) - Pointer/address ops (`*x`, `&x`) -- Bitwise operators (`& | ^ << >> &^`) +- Bitwise operators (`& ^ << >> &^`). The `|` token is the + [pipeline operator](#pipeline-); a `|` whose right side is not a + call is rejected at Compile time. - Imaginary number literals (`1i`) - Spread call arguments (`f(xs...)`) - Label and selector type names (`pkg.Type`) diff --git a/docs/rfcs/0001-pipe-operator.md b/docs/rfcs/0001-pipe-operator.md index 0c8b357..4cd5490 100644 --- a/docs/rfcs/0001-pipe-operator.md +++ b/docs/rfcs/0001-pipe-operator.md @@ -1,8 +1,15 @@ # RFC 0001: Pipe Operator (`|`) -**Status:** Draft +**Status:** Implemented (always on) **Date:** 2026-06-12 -**No implementation commitment has been made. This document exists to think the design through.** +**Implementation notes:** The desugar, the non-call right-side errors, +and the ambiguous-comparison diagnostic (§3.3) shipped as recommended. +The opt-in recommendation (§7.3) was overridden: the default-on +question (§9.2 step 5) was resolved in favor of enabling the pipe +unconditionally, since `|` never compiled before and the token reuse +therefore breaks no existing expression. Normative language +documentation lives in [the spec](../reference/spec.md); this document +records the design rationale. --- diff --git a/docs_examples_test.go b/docs_examples_test.go index 23ecad7..1913d56 100644 --- a/docs_examples_test.go +++ b/docs_examples_test.go @@ -479,3 +479,40 @@ func TestDocsExample10_GuardedMathAndGroups(t *testing.T) { } assertDeepEqual(t, got, want) } + +// Example 13: Pipelines. +func TestDocsExample13_Pipelines(t *testing.T) { + src := `events | + filter(it.kind == "error") | + map(sprintf("[%s] %s", it.source, it.message)) | + join("\n")` + env := map[string]any{ + "events": []any{ + map[string]any{"kind": "error", "source": "api", "message": "timeout"}, + map[string]any{"kind": "info", "source": "api", "message": "ok"}, + map[string]any{"kind": "error", "source": "db", "message": "deadlock"}, + }, + } + opts := []Option{WithBuiltins(), WithFunctions(StringFuncs())} + got := runDocExample(t, src, env, opts...) + assertDeepEqual(t, got, "[api] timeout\n[db] deadlock") + + // The doc claims the pipe is sugar for the nested call form. + nested := runDocExample(t, + `join(map(filter(events, it.kind == "error"), sprintf("[%s] %s", it.source, it.message)), "\n")`, + env, opts...) + assertDeepEqual(t, got, nested) + + // The doc claims a bare (non-call) right side is a compile error. + if _, err := Compile(`xs | len`, WithBuiltins()); err == nil { + t.Fatal("expected non-call pipe right side to fail compilation") + } + + // The doc claims "pipe first, compare after": the left-of-comparison + // order works, the right-of-comparison order is a compile error. + got = runDocExample(t, `events | count(it.kind == "error") == 2`, env, opts...) + assertDeepEqual(t, got, true) + if _, err := Compile(`a == b | upper()`, opts...); err == nil { + t.Fatal("expected ambiguous pipe-right-of-comparison to fail compilation") + } +} diff --git a/docs_guides_test.go b/docs_guides_test.go index 74ed709..c4963b1 100644 --- a/docs_guides_test.go +++ b/docs_guides_test.go @@ -701,3 +701,33 @@ func TestGuide_Templates_ErrorsReportLineColumn(t *testing.T) { t.Fatalf("expected line:col in error, got %v", rerr) } } + +// --- higher-order-patterns.md: pipelines -------------------------------------- + +func TestGuide_HigherOrder_Pipelines(t *testing.T) { + // higher-order-patterns.md: `a | f(x)` is exactly `f(a, x)`, for + // both the two-arg and named-binding shapes. + orders := []any{ + map[string]any{"id": "a-1", "status": "paid", "total": int64(50)}, + map[string]any{"id": "b-2", "status": "open", "total": int64(10)}, + map[string]any{"id": "c-3", "status": "paid", "total": int64(70)}, + } + env := map[string]any{"orders": orders} + opts := []Option{WithBuiltins()} + + piped := runGuide(t, `orders | filter(it.status == "paid") | map(it.id)`, env, opts...) + nested := runGuide(t, `map(filter(orders, it.status == "paid"), it.id)`, env, opts...) + assertDeepEqual(t, piped, nested) + assertDeepEqual(t, piped, []any{"a-1", "c-3"}) + + named := runGuide(t, `orders | filter(o, o.status == "paid") | map(o, {o.id: o.total})`, env, opts...) + assertDeepEqual(t, named, []any{ + map[string]any{"a-1": int64(50)}, + map[string]any{"c-3": int64(70)}, + }) + + // The guide claims a bare (non-call) right side is a compile error. + if _, err := Compile(`xs | trim`, opts...); err == nil { + t.Fatal("expected non-call pipe right side to fail compilation") + } +} diff --git a/engine.go b/engine.go index 76c07db..af82aa3 100644 --- a/engine.go +++ b/engine.go @@ -2,7 +2,8 @@ // go/parser. It accepts the subset of Go expression syntax useful for // conditions, templates, and parameter interpolation: identifiers, // selectors, index expressions, arithmetic, comparisons, logical -// operators, and calls to registered functions. +// operators, calls to registered functions, and pipelines +// (`a | f(x)` compiles as `f(a, x)`). // // expr is intentionally small and adds no external dependencies. // @@ -216,6 +217,13 @@ func compileWithConfig(code string, cfg *compileConfig) (*Program, error) { if err != nil { return nil, fmt.Errorf("%w: %v", ErrCompile, err) } + // Desugar pipe expressions (`a | f(x)` → `f(a, x)`) before + // validation so validate and the evaluator only ever see ordinary + // call nodes. See pipe.go. + node, err = desugarPipes(fset, node) + if err != nil { + return nil, err + } if err := validate(fset, node); err != nil { return nil, err } diff --git a/fuzz_test.go b/fuzz_test.go index 1aa6e46..a927bcf 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -79,6 +79,18 @@ var fuzzCorpus = []string{ "?.x", "a?.", "a?[", + // pipe operator + "state.items | len()", + `state.name | upper() | lower()`, + "state.items | filter(it > 1) | map(it * 2)", + "state.user?.name | upper()", + `"a|b" | upper()`, + "1 | 2", + "state.items | foo", + "state.items | a?.b", + "state.items | (len())", + "state.items | len() > 2", + "1 + 2 | len()", } // fuzzEnv is the environment FuzzEval runs every mutated expression diff --git a/llms.txt b/llms.txt index 5bfe6a6..70fdbda 100644 --- a/llms.txt +++ b/llms.txt @@ -17,9 +17,10 @@ language reference is [`docs/reference/spec.md`](docs/reference/spec.md). no function definitions. An expression takes an environment and produces a value. - **Parsed by `go/parser.ParseExpr`.** The accepted grammar is a strict - subset of Go expression syntax, with two ergonomic extensions: bare - `[...]` becomes `[]any{...}` and bare `{"k": v}` becomes - `map[string]any{"k": v}`. + subset of Go expression syntax, with a few ergonomic extensions: bare + `[...]` becomes `[]any{...}`, bare `{"k": v}` becomes + `map[string]any{"k": v}`, `?.` / `?[` are optional access, and + `a | f(x)` is the pipeline `f(a, x)`. - **Walked, not compiled.** expr visits the `ast.Expr` directly. There is no bytecode, no VM, no optimizer. This is deliberate — the whole library is ~3.9k non-test Go LOC. @@ -28,7 +29,7 @@ language reference is [`docs/reference/spec.md`](docs/reference/spec.md). caps total nodes evaluated per Run so hostile nesting fails deterministically. Bitwise ops, pointer ops, channel ops, spread args, type assertions, and function literals are all rejected at - eval time. + Compile time. - **Loosely typed at runtime.** Integers are `int64`, floats are `float64`, numeric comparisons mix freely, and truthiness covers nil/false/zero/empty-string/empty-collection. @@ -219,6 +220,35 @@ There is no `??` operator. Use `?.` / `?[` with operand-returning `||` for nil-or-fallback, and `try(x, default)` when the LHS may be a meaningful falsey value you want to keep. +## Pipeline operator + +`a | f(x, y)` compiles exactly as `f(a, x, y)`: the left side becomes +the first argument of the right-side call, and chains read left to +right. The rewrite is purely compile-time and composes with every call +shape, including the higher-order forms (both arities) and `try`/`if`: + +``` +checks | filter(!it.ok) | map(it.name) | join(", ") +// identical to: join(map(filter(checks, !it.ok), it.name), ", ") +orders | filter(o, o.paid) | sortBy(o, o.total) +user?.name | upper() // upper(user?.name); nil pipes as nil +value | try("fallback") // try(value, "fallback"), still lazy +``` + +Rules to remember: + +- `|` was rejected as bitwise or before v1.2.0, so the pipe changed + the meaning of no previously-compilable expression. +- The right side must be written as a call: `xs | trim()` works, + `xs | trim` is ErrCompile with a "did you mean trim(...)?" hint. +- `|` binds at `+`/`-` precedence: tighter than `==` and `&&`, so + `xs | count(it > 1) == 2` compares the piped count. A bare pipe on + the *right* of a comparison (`a == b | f()`) is an ErrCompile + demanding parentheses, because it would silently parse as + `a == f(b)`. +- Postfix binds tighter: indexing or `?.` on a pipe result needs + parens: `(xs | find(it.ok))?.name`. + ## Custom functions ```go @@ -367,7 +397,8 @@ across multiple lines, every continuation line **must end with an operator, comma, or opening bracket** — never with an identifier, literal, or closing `)` / `]` / `}`. The safe continuations are `&&`, `||`, `+`, `-`, `*`, `/`, `%`, `==`, `!=`, `<`, `<=`, `>`, `>=`, -`,`, `(`, `[`, `{`. Multi-line call arguments can use trailing commas. +`,`, `(`, `[`, `{`, and a trailing `|`. +Multi-line call arguments can use trailing commas. See `docs/guides/examples.md` for worked multi-line expressions. ## Loose semantics worth remembering @@ -436,12 +467,13 @@ expr will not silently leak goroutines to make cancellation look nicer. ## What is *not* supported -These parse but error at eval time, so the "did I use a feature expr -doesn't have?" answer is always readable: +These parse but are rejected at Compile time with `ErrCompile`, so the +"did I use a feature expr doesn't have?" answer is always readable: - Slice expressions (`x[a:b]`, `x[a:b:c]`) - Type assertions (`x.(T)`) -- Bitwise operators (`& | ^ << >> &^`) +- Bitwise operators (`& ^ << >> &^`); `|` is the pipeline operator, + and a `|` whose right side is not a call is rejected - Pointer/address ops (`*x`, `&x`) - Channel ops (`<-ch`, `ch <- v`) - Imaginary literals (`1i`) diff --git a/pipe.go b/pipe.go new file mode 100644 index 0000000..45501aa --- /dev/null +++ b/pipe.go @@ -0,0 +1,215 @@ +package expr + +import ( + "go/ast" + "go/token" + "strings" +) + +// This file implements the pipeline operator (RFC 0001, +// docs/rfcs/0001-pipe-operator.md). +// +// desugarPipes rewrites every pipe expression `a | f(x, y)` in the +// tree into the call `f(a, x, y)`. The rewrite runs once at Compile +// time, between parsing and validation, so the evaluator, the +// identifier collector, and every fast path see only ordinary call +// nodes — the pipe has no runtime representation at all. `|` was +// rejected at Compile time as an unsupported bitwise operator before +// the pipe existed, so repurposing the token changes the meaning of +// no previously-compilable expression. Special forms compose +// naturally because they receive the rewritten CallExpr like any +// other call: `xs | map(it*2)` is exactly `map(xs, it*2)`, including +// the lazy/per-element evaluation rules. +// +// The right-hand side must be a call expression syntactically; any +// other right side is an ErrCompile (see pipeNonCallErr). A pipe +// appearing un-parenthesized as the right operand of a comparison is +// also an ErrCompile: `a == b | f()` parses as `a == (b | f())`, +// which silently consumes the comparison's right operand, so expr +// demands parentheses for that one shape (RFC 0001 §3.3). +// +// The walk recurses only into the node types validate accepts. A pipe +// nested inside an unsupported construct (say, a slice expression) is +// left alone; validate rejects the enclosing construct first, which is +// the better error. +func desugarPipes(fset *token.FileSet, node ast.Expr) (ast.Expr, error) { + switch n := node.(type) { + case *ast.BinaryExpr: + if n.Op == token.OR { + return desugarPipe(fset, n) + } + if isComparisonOp(n.Op) { + if pipe, ok := n.Y.(*ast.BinaryExpr); ok && pipe.Op == token.OR { + return nil, validateErr(fset, pipe.OpPos, + "ambiguous expression: | on the right of %s may parse differently than expected; use parentheses to clarify: write (a | f()) %s b or a %s f(b)", + n.Op, n.Op, n.Op) + } + } + x, err := desugarPipes(fset, n.X) + if err != nil { + return nil, err + } + y, err := desugarPipes(fset, n.Y) + if err != nil { + return nil, err + } + n.X, n.Y = x, y + return n, nil + case *ast.ParenExpr: + x, err := desugarPipes(fset, n.X) + if err != nil { + return nil, err + } + n.X = x + return n, nil + case *ast.UnaryExpr: + x, err := desugarPipes(fset, n.X) + if err != nil { + return nil, err + } + n.X = x + return n, nil + case *ast.SelectorExpr: + x, err := desugarPipes(fset, n.X) + if err != nil { + return nil, err + } + n.X = x + return n, nil + case *ast.IndexExpr: + x, err := desugarPipes(fset, n.X) + if err != nil { + return nil, err + } + idx, err := desugarPipes(fset, n.Index) + if err != nil { + return nil, err + } + n.X, n.Index = x, idx + return n, nil + case *ast.CallExpr: + fun, err := desugarPipes(fset, n.Fun) + if err != nil { + return nil, err + } + n.Fun = fun + for i, a := range n.Args { + da, err := desugarPipes(fset, a) + if err != nil { + return nil, err + } + n.Args[i] = da + } + return n, nil + case *ast.CompositeLit: + for i, e := range n.Elts { + if kv, ok := e.(*ast.KeyValueExpr); ok { + k, err := desugarPipes(fset, kv.Key) + if err != nil { + return nil, err + } + v, err := desugarPipes(fset, kv.Value) + if err != nil { + return nil, err + } + kv.Key, kv.Value = k, v + continue + } + de, err := desugarPipes(fset, e) + if err != nil { + return nil, err + } + n.Elts[i] = de + } + return n, nil + } + return node, nil +} + +func isComparisonOp(op token.Token) bool { + switch op { + case token.EQL, token.NEQ, token.LSS, token.LEQ, token.GTR, token.GEQ: + return true + } + return false +} + +// desugarPipe rewrites a single `lhs | call(args...)` node. The left +// side desugars first (pipe chains are left-associative, so +// `a | f() | g()` arrives as `(a | f()) | g()` and the recursion +// bottoms out at the leftmost stage), then splices in as the call's +// first argument. The call node is reused so its position info — used +// by validate for any later error — stays intact. +func desugarPipe(fset *token.FileSet, n *ast.BinaryExpr) (ast.Expr, error) { + lhs, err := desugarPipes(fset, n.X) + if err != nil { + return nil, err + } + call, ok := n.Y.(*ast.CallExpr) + if !ok || isOptAccessCall(call) { + // The optaccess pre-parse rewrite turns `a?.b` / `a?[i]` into + // sentinel calls, so without the extra check `xs | a?.b` would + // silently desugar into a three-argument sentinel call. The + // user wrote an optional access, not a call; reject it like + // any other non-call right side. + return nil, pipeNonCallErr(fset, n.OpPos, n.Y) + } + fun, err := desugarPipes(fset, call.Fun) + if err != nil { + return nil, err + } + args := make([]ast.Expr, 0, len(call.Args)+1) + args = append(args, lhs) + for _, a := range call.Args { + da, err := desugarPipes(fset, a) + if err != nil { + return nil, err + } + args = append(args, da) + } + call.Fun = fun + call.Args = args + return call, nil +} + +func isOptAccessCall(call *ast.CallExpr) bool { + id, ok := call.Fun.(*ast.Ident) + return ok && (id.Name == trySelectFormName || id.Name == tryIndexFormName) +} + +// pipeNonCallErr builds the rejection for a non-call right-hand side +// of `|`, following the error taxonomy in RFC 0001 §6. A bare +// identifier earns a "did you mean to write name(...)?" nudge, and an +// identifier naming a special form shows the form's signature with +// the collection argument dropped, since the pipe supplies it. +func pipeNonCallErr(fset *token.FileSet, opPos token.Pos, rhs ast.Expr) error { + const base = "pipe operator | requires a function call on the right-hand side" + if id, ok := rhs.(*ast.Ident); ok { + name := displayIdent(id.Name) + for _, f := range userForms { + if f.name == name { + return validateErr(fset, opPos, + "%s; %q is a special form, did you mean to write %s?", + base, name, pipeCallHint(f.callHint)) + } + } + return validateErr(fset, opPos, + "%s; %q is not a call (did you mean to write %s(...)?)", base, name, name) + } + if src := exprDisplayString(rhs); src != "" { + return validateErr(fset, opPos, "%s; %q is not a call", base, src) + } + return validateErr(fset, opPos, "%s", base) +} + +// pipeCallHint rewrites a form's call signature for pipe position by +// dropping the first parameter, which the pipe supplies: +// `filter(xs, predicate)` becomes `filter(predicate)`. +func pipeCallHint(hint string) string { + open := strings.IndexByte(hint, '(') + comma := strings.IndexByte(hint, ',') + if open < 0 || comma < open { + return hint + } + return hint[:open+1] + strings.TrimSpace(hint[comma+1:]) +} diff --git a/pipe_test.go b/pipe_test.go new file mode 100644 index 0000000..b703f4b --- /dev/null +++ b/pipe_test.go @@ -0,0 +1,334 @@ +package expr + +import ( + "context" + "strings" + "testing" + + "github.com/deepnoodle-ai/expr/internal/require" +) + +// The pipe operator `a | f(x)` (RFC 0001) desugars at Compile time +// into `f(a, x)`. These tests pin the rewrite itself, its precedence +// behavior, its composition with the special forms and optional +// access, the compile errors for non-call right sides, and the +// ambiguous-comparison diagnostic. + +func pipeOpts(extra ...Option) []Option { + return append([]Option{WithBuiltins()}, extra...) +} + +func TestPipe_Basic(t *testing.T) { + got, err := evalExpr(t.Context(), `"ada" | upper()`, nil, pipeOpts()...) + require.NoError(t, err) + require.Equal(t, "ADA", got) +} + +func TestPipe_ExtraArgsShiftRight(t *testing.T) { + funcs := map[string]any{ + "add": func(a, b int64) int64 { return a + b }, + } + got, err := evalExpr(t.Context(), `5 | add(3)`, nil, pipeOpts(WithFunctions(funcs))...) + require.NoError(t, err) + require.Equal(t, int64(8), got) +} + +func TestPipe_Chain(t *testing.T) { + env := map[string]any{ + "checks": []any{ + map[string]any{"name": "fmt", "ok": true, "msg": ""}, + map[string]any{"name": "vet", "ok": false, "msg": "shadowed var"}, + map[string]any{"name": "test", "ok": false, "msg": "2 failures"}, + }, + } + got, err := evalExpr(t.Context(), + `checks | filter(!it.ok) | map(sprintf("- %s: %s", it.name, it.msg)) | join("\n")`, + env, pipeOpts(WithFunctions(StringFuncs()))...) + require.NoError(t, err) + require.Equal(t, "- vet: shadowed var\n- test: 2 failures", got) +} + +func TestPipe_IteratingForms(t *testing.T) { + env := map[string]any{"xs": []any{3, 1, 4, 1, 5}} + cases := []struct { + src string + want any + }{ + {`xs | filter(it > 2)`, []any{3, 4, 5}}, + {`xs | map(it * 10)`, []any{int64(30), int64(10), int64(40), int64(10), int64(50)}}, + {`xs | any(it > 4)`, true}, + {`xs | all(it > 0)`, true}, + {`xs | find(it > 3)`, 4}, + {`xs | count(it == 1)`, int64(2)}, + {`xs | sortBy(-it)`, []any{5, 4, 3, 1, 1}}, + {`xs | flatMap([it, it])`, []any{3, 3, 1, 1, 4, 4, 1, 1, 5, 5}}, + } + for _, tc := range cases { + t.Run(tc.src, func(t *testing.T) { + got, err := evalExpr(t.Context(), tc.src, env) + require.NoError(t, err) + require.Equal(t, tc.want, got) + }) + } +} + +func TestPipe_NamedBindingForm(t *testing.T) { + env := map[string]any{ + "orders": []any{ + map[string]any{"id": 1, "paid": true}, + map[string]any{"id": 2, "paid": false}, + map[string]any{"id": 3, "paid": true}, + }, + } + got, err := evalExpr(t.Context(), `orders | filter(o, o.paid) | map(o, o.id)`, + env) + require.NoError(t, err) + require.Equal(t, []any{1, 3}, got) +} + +func TestPipe_IntoIfAndTry(t *testing.T) { + // Special forms receive the rewritten CallExpr like any other + // call, so piping into `if` and `try` keeps their lazy semantics. + env := map[string]any{"x": int64(7)} + + // `>` binds looser than `|`, so the comparison needs parens to be + // the piped value. + got, err := evalExpr(t.Context(), `(x > 2) | if("big", "small")`, env) + require.NoError(t, err) + require.Equal(t, "big", got) + + // try(value, default): the failing division is the piped value and + // is still evaluated inside the form's error scope. + got, err = evalExpr(t.Context(), `x / 0 | try(-1)`, env) + require.NoError(t, err) + require.Equal(t, int64(-1), got) +} + +func TestPipe_Precedence(t *testing.T) { + env := map[string]any{ + "xs": []any{1, 2, 3}, + "a": "ADA", + "b": "ada", + "n": int64(4), + } + funcs := map[string]any{ + "double": func(n int64) int64 { return 2 * n }, + } + cases := []struct { + src string + want any + }{ + // `|` binds tighter than comparisons: a pipe on the left is + // the comparison operand. + {`xs | count(it > 1) == 2`, true}, + {`xs | len() > 2`, true}, + // A parenthesized pipe is fine on the right of a comparison + // (the bare spelling is the ambiguity diagnostic, tested + // below). + {`a == (b | upper())`, true}, + // Same precedence as `+`/`-`, left-associative: arithmetic on + // the left becomes part of the piped value. + {`n + 1 | double()`, int64(10)}, + // And a pipe result is an ordinary arithmetic operand. + {`n | double() + 1`, int64(9)}, + // Logical operators bind looser than `|`. + {`true && xs | any(it == 3)`, true}, + {`false || xs | all(it >= 1)`, true}, + } + for _, tc := range cases { + t.Run(tc.src, func(t *testing.T) { + got, err := evalExpr(t.Context(), tc.src, env, pipeOpts(WithFunctions(funcs))...) + require.NoError(t, err) + require.Equal(t, tc.want, got) + }) + } +} + +func TestPipe_AmbiguousComparisonDiagnostic(t *testing.T) { + // RFC 0001 §3.3: a bare pipe as the right operand of a comparison + // silently consumes the comparison's right operand, so it is an + // ErrCompile demanding parentheses. + bad := []struct { + src string + op string + }{ + {`a == b | upper()`, "=="}, + {`len(errors) == 0 | bool()`, "=="}, + {`a != b | upper()`, "!="}, + {`a < b | len()`, "<"}, + {`f(a >= b | len())`, ">="}, + } + for _, tc := range bad { + t.Run(tc.src, func(t *testing.T) { + _, err := Compile(tc.src, pipeOpts()...) + require.Error(t, err) + require.ErrorIs(t, err, ErrCompile) + require.Contains(t, err.Error(), "ambiguous expression: | on the right of "+tc.op) + }) + } + // Both parenthesized spellings the diagnostic suggests compile. + for _, src := range []string{`(a | upper()) == b`, `a == (b | upper())`} { + _, err := Compile(src, pipeOpts()...) + require.NoError(t, err) + } + // A pipe as the LEFT operand of a comparison is the useful, + // unambiguous order and never trips the diagnostic. + _, err := Compile(`xs | count(it > 1) == 2`, pipeOpts()...) + require.NoError(t, err) +} + +func TestPipe_NestedInsideFormBody(t *testing.T) { + env := map[string]any{ + "users": []any{ + map[string]any{"name": "ada", "tags": []any{"a", "b"}}, + map[string]any{"name": "bob", "tags": []any{"c"}}, + }, + } + got, err := evalExpr(t.Context(), + `users | map(it.tags | join("+"))`, + env, WithFunctions(StringFuncs())) + require.NoError(t, err) + require.Equal(t, []any{"a+b", "c"}, got) +} + +func TestPipe_OptionalAccess(t *testing.T) { + env := map[string]any{ + "user": map[string]any{"name": "ada"}, + "ghost": nil, + "xs": []any{ + map[string]any{"name": "ada", "ok": false}, + map[string]any{"name": "bob", "ok": true}, + }, + } + got, err := evalExpr(t.Context(), `user?.name | upper()`, env, pipeOpts()...) + require.NoError(t, err) + require.Equal(t, "ADA", got) + + // RFC 0001 §4.1: a nil optional-access result pipes nil into the + // call; the iterating forms treat nil as an empty list. + got, err = evalExpr(t.Context(), `ghost?.orders | filter(it.paid)`, env, pipeOpts()...) + require.NoError(t, err) + require.Equal(t, []any{}, got) + + // Optional access composes on a parenthesized pipe result. + got, err = evalExpr(t.Context(), `(xs | find(it.ok))?.name`, env) + require.NoError(t, err) + require.Equal(t, "bob", got) + + // ... including when the pipeline found nothing. + got, err = evalExpr(t.Context(), `(xs | find(it.name == "eve"))?.name`, env) + require.NoError(t, err) + require.Nil(t, got) +} + +func TestPipe_EnvCallable(t *testing.T) { + env := map[string]any{ + "x": int64(3), + "triple": func(n int64) int64 { return 3 * n }, + } + got, err := evalExpr(t.Context(), `x | triple()`, env) + require.NoError(t, err) + require.Equal(t, int64(9), got) +} + +func TestPipe_InsideStringLiteralUntouched(t *testing.T) { + got, err := evalExpr(t.Context(), `"a|b" | upper()`, nil, pipeOpts()...) + require.NoError(t, err) + require.Equal(t, "A|B", got) +} + +func TestPipe_NonCallRHSErrors(t *testing.T) { + cases := []struct { + name string + src string + want string // substring required in the error message + }{ + {"literal_rhs", `1 | 2`, `"2" is not a call`}, + {"ident_rhs", `xs | foo`, `"foo" is not a call (did you mean to write foo(...)?)`}, + {"form_ident_rhs", `xs | filter`, `"filter" is a special form, did you mean to write filter(predicate)?`}, + {"form_keyword_rhs", `xs | map`, `"map" is a special form, did you mean to write map(predicate)?`}, + {"form_try_rhs", `xs | try`, `"try" is a special form, did you mean to write try(default)?`}, + {"selector_rhs", `xs | f().name`, `"f().name" is not a call`}, + {"index_rhs", `xs | f()[0]`, `"f()[0]" is not a call`}, + {"paren_rhs", `xs | (f())`, `is not a call`}, + {"chained_bad_stage", `xs | f() | 2`, `"2" is not a call`}, + {"opt_select_rhs", `xs | a?.b`, `"a?.b" is not a call`}, + {"opt_index_rhs", `xs | a?[0]`, `"a?[0]" is not a call`}, + {"nested_in_args", `f(1 | 2)`, `is not a call`}, + {"nested_in_list", `[1 | 2]`, `is not a call`}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := Compile(tc.src, pipeOpts()...) + require.Error(t, err) + require.ErrorIs(t, err, ErrCompile) + require.Contains(t, err.Error(), "pipe operator | requires a function call on the right-hand side") + require.Contains(t, err.Error(), tc.want) + }) + } +} + +func TestPipe_UnknownFunctionErrorsAtRun(t *testing.T) { + // Like any other call, an unregistered pipe target compiles (the + // env may provide the callable at Run) and fails at evaluation. + p, err := Compile(`xs | nosuchfn()`, pipeOpts()...) + require.NoError(t, err) + _, err = p.Run(context.Background(), map[string]any{"xs": []any{1}}) + require.Error(t, err) + require.ErrorIs(t, err, ErrEvaluate) + require.Contains(t, err.Error(), "nosuchfn") +} + +func TestPipe_Identifiers(t *testing.T) { + p, err := Compile(`orders | filter(o, o.paid && o.total < limit) | map(o, o.id)`, + pipeOpts()...) + require.NoError(t, err) + require.Equal(t, []string{"limit", "orders"}, p.Identifiers()) +} + +func TestPipe_Template(t *testing.T) { + tmpl, err := NewTemplate( + "failing: ${checks | filter(!it.ok) | map(it.name) | join(\", \")}", + pipeOpts(WithFunctions(StringFuncs()))...) + require.NoError(t, err) + out, err := tmpl.Render(t.Context(), map[string]any{ + "checks": []any{ + map[string]any{"name": "vet", "ok": false}, + map[string]any{"name": "fmt", "ok": true}, + map[string]any{"name": "test", "ok": false}, + }, + }) + require.NoError(t, err) + require.Equal(t, "failing: vet, test", out) +} + +func TestPipe_SourceUnchanged(t *testing.T) { + src := `xs | filter(it > 0)` + p, err := Compile(src) + require.NoError(t, err) + require.Equal(t, src, p.Source()) +} + +func TestPipe_EquivalentToNestedCalls(t *testing.T) { + env := map[string]any{"xs": []any{5, -2, 9, 0}} + opts := pipeOpts(WithFunctions(StringFuncs())) + piped, err := evalExpr(t.Context(), + `xs | filter(it > 0) | map(string(it)) | join("-")`, env, opts...) + require.NoError(t, err) + nested, err := evalExpr(t.Context(), + `join(map(filter(xs, it > 0), string(it)), "-")`, env, opts...) + require.NoError(t, err) + require.Equal(t, nested, piped) + require.Equal(t, "5-9", piped) +} + +func TestPipe_LongChainStaysWithinDepthLimit(t *testing.T) { + // A pipeline desugars to nested calls, so an absurdly long chain + // must hit MaxEvalDepth as an error, not a stack overflow. + src := `"x"` + strings.Repeat(` | upper()`, MaxEvalDepth+8) + p, err := Compile(src, pipeOpts()...) + require.NoError(t, err) + _, err = p.Run(context.Background(), nil) + require.Error(t, err) + require.Contains(t, err.Error(), "nested too deeply") +} diff --git a/validate.go b/validate.go index f2940f8..fdb2bb9 100644 --- a/validate.go +++ b/validate.go @@ -49,7 +49,15 @@ func validate(fset *token.FileSet, node ast.Expr) error { return err } return validate(fset, n.Y) - case token.AND, token.OR, token.XOR, token.SHL, token.SHR, token.AND_NOT: + case token.OR: + // `|` is the pipe operator; pipe nodes are desugared into + // calls (or rejected with a pipe-specific message) before + // validation ever runs, so a surviving OR node means a + // caller bypassed the rewrite. Repeat the rewrite's own + // rejection so the message matches either way. + return validateErr(fset, n.OpPos, + "pipe operator | requires a function call on the right-hand side") + case token.AND, token.XOR, token.SHL, token.SHR, token.AND_NOT: return validateErr(fset, n.OpPos, "bitwise operator %s is not supported", n.Op) } return validateErr(fset, n.OpPos, "unsupported binary operator %s", n.Op) diff --git a/validate_test.go b/validate_test.go index 49f12e6..5a5d186 100644 --- a/validate_test.go +++ b/validate_test.go @@ -27,9 +27,11 @@ func TestValidate_RejectsAtCompile(t *testing.T) { {"chan_recv", "<-c", "channel receive"}, {"bitwise_not", "^x", "bitwise complement"}, - // Binary (bitwise) + // Binary (bitwise). `|` is the pipe operator, so a non-call + // right side gets the pipe message rather than a bitwise one + // (see pipe_test.go). {"bitwise_and", "1 & 2", "bitwise"}, - {"bitwise_or", "1 | 2", "bitwise"}, + {"pipe_non_call", "1 | 2", "pipe operator | requires a function call"}, {"bitwise_xor", "1 ^ 2", "bitwise"}, {"shift_left", "1 << 2", "bitwise"}, {"shift_right", "1 >> 2", "bitwise"},