refactor(formula): trust grammar invariants in parser

Refactor the formula parser to assume grammar invariants, replacing Result-based error handling in tree walkers with infallible functions and .expect(GRAMMAR_INVARIANT) calls. This simplification is based on the guarantee that the Pest grammar already validates the input structure. To verify these invariants and ensure correctness, extensive new unit tests and property-based tests using proptest have been added. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf)
2026-04-15 04:32:14 -07:00
parent dba8a5269e
commit 5fbc73269f
3 changed files with 607 additions and 170 deletions
@@ -9,6 +9,11 @@ use super::ast::{AggFunc, BinOp, Expr, Filter, Formula};
 #[grammar = "formula.pest"]
 struct FormulaParser;

+/// Message used by `.expect()` calls on invariants that the grammar
+/// guarantees. If one of these ever panics, the grammar and the tree
+/// walker are out of sync — it's a bug, not a runtime condition.
+const GRAMMAR_INVARIANT: &str = "grammar invariant violated: parser out of sync with formula.pest";
+
 /// Parse a formula string like "Profit = Revenue - Cost"
 /// or "Tax = Revenue * 0.08 WHERE Region = \"East\""
 pub fn parse_formula(raw: &str, target_category: &str) -> Result<Formula> {
@@ -16,221 +21,198 @@ pub fn parse_formula(raw: &str, target_category: &str) -> Result<Formula> {
    let formula_pair = FormulaParser::parse(Rule::formula, input)
        .map_err(|e| anyhow!("{}", e))?
        .next()
-        .ok_or_else(|| anyhow!("empty parse result"))?;
-    build_formula(formula_pair, input, target_category)
+        .expect(GRAMMAR_INVARIANT);
+    Ok(build_formula(formula_pair, input, target_category))
 }

 /// Parse a bare expression (no target, no top-level WHERE clause).
 /// Fails if the input contains trailing tokens after a complete expression.
 pub fn parse_expr(s: &str) -> Result<Expr> {
    let input = s.trim();
-    let expr_pair = FormulaParser::parse(Rule::expr_eoi, input)
+    let expr_eoi_pair = FormulaParser::parse(Rule::expr_eoi, input)
        .map_err(|e| anyhow!("{}", e))?
        .next()
-        .ok_or_else(|| anyhow!("empty parse result"))?
-        .into_inner()
-        .next()
-        .ok_or_else(|| anyhow!("missing expression in expr_eoi"))?;
-    build_expr(expr_pair)
+        .expect(GRAMMAR_INVARIANT);
+    Ok(build_expr(first_inner(expr_eoi_pair)))
 }

 // ---- tree walkers -------------------------------------------------------
+//
+// Every `build_*` function below operates on a pest Pair that has already
+// been validated by the grammar.  Invariants like "a `formula` has exactly
+// one `target`" or "a `comparison` has an lhs, op, and rhs" are guaranteed
+// by pest before the tree walker sees the Pair, so these functions are
+// infallible.  Any `.expect(GRAMMAR_INVARIANT)` in here represents a bug
+// marker — if it ever fires, the grammar and the walker have diverged.

-fn build_formula(pair: Pair<Rule>, raw: &str, target_category: &str) -> Result<Formula> {
+fn build_formula(pair: Pair<Rule>, raw: &str, target_category: &str) -> Formula {
    let mut target = None;
    let mut expr = None;
    let mut filter = None;
    for inner in pair.into_inner() {
-        match inner.as_rule() {
-            Rule::target => target = Some(inner.as_str().trim().to_string()),
-            Rule::expr => expr = Some(build_expr(inner)?),
-            Rule::where_clause => filter = Some(build_filter(inner)?),
-            Rule::EOI => {}
-            r => return Err(anyhow!("unexpected rule in formula: {:?}", r)),
+        let rule = inner.as_rule();
+        if rule == Rule::target {
+            target = Some(inner.as_str().trim().to_string());
+        } else if rule == Rule::expr {
+            expr = Some(build_expr(inner));
+        } else if rule == Rule::where_clause {
+            filter = Some(build_filter(inner));
        }
+        // Rule::EOI and any silent rules are ignored.
    }
-    let target = target.ok_or_else(|| anyhow!("missing target in formula"))?;
-    let expr = expr.ok_or_else(|| anyhow!("missing expression in formula"))?;
-    Ok(Formula::new(raw, target, target_category, expr, filter))
+    Formula::new(
+        raw,
+        target.expect(GRAMMAR_INVARIANT),
+        target_category,
+        expr.expect(GRAMMAR_INVARIANT),
+        filter,
+    )
 }

-fn build_expr(pair: Pair<Rule>) -> Result<Expr> {
+fn build_expr(pair: Pair<Rule>) -> Expr {
    // expr = { add_expr }
-    build_add_expr(first_inner(pair, "expr")?)
+    build_add_expr(first_inner(pair))
 }

-fn build_add_expr(pair: Pair<Rule>) -> Result<Expr> {
-    fold_left_binop(pair, build_mul_expr, |s| match s {
-        "+" => Some(BinOp::Add),
-        "-" => Some(BinOp::Sub),
-        _ => None,
+fn build_add_expr(pair: Pair<Rule>) -> Expr {
+    fold_left_binop(pair, build_mul_expr, |s| {
+        if s == "+" {
+            BinOp::Add
+        } else {
+            // The grammar restricts add_op to "+" | "-".
+            BinOp::Sub
+        }
    })
 }

-fn build_mul_expr(pair: Pair<Rule>) -> Result<Expr> {
-    fold_left_binop(pair, build_pow_expr, |s| match s {
-        "*" => Some(BinOp::Mul),
-        "/" => Some(BinOp::Div),
-        _ => None,
+fn build_mul_expr(pair: Pair<Rule>) -> Expr {
+    fold_left_binop(pair, build_pow_expr, |s| {
+        if s == "*" {
+            BinOp::Mul
+        } else {
+            // The grammar restricts mul_op to "*" | "/".
+            BinOp::Div
+        }
    })
 }

-fn build_pow_expr(pair: Pair<Rule>) -> Result<Expr> {
+fn build_pow_expr(pair: Pair<Rule>) -> Expr {
    // pow_expr = { unary ~ (pow_op ~ unary)? }
    let mut pairs = pair.into_inner();
-    let base_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("empty pow_expr"))?;
-    let base = build_unary(base_pair)?;
+    let base = build_unary(pairs.next().expect(GRAMMAR_INVARIANT));
    match pairs.next() {
-        None => Ok(base),
-        Some(op_pair) => {
-            debug_assert_eq!(op_pair.as_rule(), Rule::pow_op);
-            let exp_pair = pairs
-                .next()
-                .ok_or_else(|| anyhow!("missing exponent in pow_expr"))?;
-            let exp = build_unary(exp_pair)?;
-            Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp)))
+        None => base,
+        Some(_pow_op) => {
+            let exp = build_unary(pairs.next().expect(GRAMMAR_INVARIANT));
+            Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp))
        }
    }
 }

-fn build_unary(pair: Pair<Rule>) -> Result<Expr> {
+fn build_unary(pair: Pair<Rule>) -> Expr {
    // unary = { unary_minus | primary }
-    let inner = first_inner(pair, "unary")?;
-    match inner.as_rule() {
-        Rule::unary_minus => {
-            let prim = first_inner(inner, "unary_minus")?;
-            Ok(Expr::UnaryMinus(Box::new(build_primary(prim)?)))
-        }
-        Rule::primary => build_primary(inner),
-        r => Err(anyhow!("unexpected rule in unary: {:?}", r)),
+    let inner = first_inner(pair);
+    if inner.as_rule() == Rule::unary_minus {
+        Expr::UnaryMinus(Box::new(build_primary(first_inner(inner))))
+    } else {
+        // primary is the only other alternative.
+        build_primary(inner)
    }
 }

-fn build_primary(pair: Pair<Rule>) -> Result<Expr> {
+fn build_primary(pair: Pair<Rule>) -> Expr {
    // primary = { number | agg_call | if_expr | paren_expr | ref_expr }
-    let inner = first_inner(pair, "primary")?;
-    match inner.as_rule() {
-        Rule::number => Ok(Expr::Number(inner.as_str().parse()?)),
-        Rule::agg_call => build_agg_call(inner),
-        Rule::if_expr => build_if_expr(inner),
-        Rule::paren_expr => build_expr(first_inner(inner, "paren_expr")?),
-        Rule::ref_expr => {
-            let id_pair = first_inner(inner, "ref_expr")?;
-            Ok(Expr::Ref(identifier_to_string(id_pair)))
-        }
-        r => Err(anyhow!("unexpected rule in primary: {:?}", r)),
+    let inner = first_inner(pair);
+    let rule = inner.as_rule();
+    if rule == Rule::number {
+        Expr::Number(inner.as_str().parse().expect(GRAMMAR_INVARIANT))
+    } else if rule == Rule::agg_call {
+        build_agg_call(inner)
+    } else if rule == Rule::if_expr {
+        build_if_expr(inner)
+    } else if rule == Rule::paren_expr {
+        build_expr(first_inner(inner))
+    } else {
+        // ref_expr is the only remaining alternative.
+        Expr::Ref(identifier_to_string(first_inner(inner)))
    }
 }

-fn build_agg_call(pair: Pair<Rule>) -> Result<Expr> {
+fn build_agg_call(pair: Pair<Rule>) -> Expr {
    // agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" }
    let mut pairs = pair.into_inner();
-    let func_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing agg_func"))?;
-    let func = parse_agg_func(func_pair.as_str())?;
-    let expr_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing aggregate argument"))?;
-    let inner_expr = build_expr(expr_pair)?;
-    let filter = match pairs.next() {
-        Some(p) if p.as_rule() == Rule::inline_where => Some(build_filter(p)?),
-        _ => None,
-    };
-    Ok(Expr::Agg(func, Box::new(inner_expr), filter))
+    let func = parse_agg_func(pairs.next().expect(GRAMMAR_INVARIANT).as_str());
+    let inner_expr = build_expr(pairs.next().expect(GRAMMAR_INVARIANT));
+    // The only pair after expr (if any) is `inline_where`, so we can
+    // map it directly without checking the rule variant.
+    let filter = pairs.next().map(build_filter);
+    Expr::Agg(func, Box::new(inner_expr), filter)
 }

-fn parse_agg_func(s: &str) -> Result<AggFunc> {
+fn parse_agg_func(s: &str) -> AggFunc {
+    // agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" }
    match s.to_ascii_uppercase().as_str() {
-        "SUM" => Ok(AggFunc::Sum),
-        "AVG" => Ok(AggFunc::Avg),
-        "MIN" => Ok(AggFunc::Min),
-        "MAX" => Ok(AggFunc::Max),
-        "COUNT" => Ok(AggFunc::Count),
-        f => Err(anyhow!("unknown aggregate function: {}", f)),
+        "SUM" => AggFunc::Sum,
+        "AVG" => AggFunc::Avg,
+        "MIN" => AggFunc::Min,
+        "MAX" => AggFunc::Max,
+        // COUNT is the only remaining alternative.
+        _ => AggFunc::Count,
    }
 }

-fn build_if_expr(pair: Pair<Rule>) -> Result<Expr> {
+fn build_if_expr(pair: Pair<Rule>) -> Expr {
    // if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" }
    let mut pairs = pair.into_inner();
-    let cond_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing IF condition"))?;
-    let cond = build_comparison(cond_pair)?;
-    let then_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing IF then-branch"))?;
-    let then_e = build_expr(then_pair)?;
-    let else_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing IF else-branch"))?;
-    let else_e = build_expr(else_pair)?;
-    Ok(Expr::If(
-        Box::new(cond),
-        Box::new(then_e),
-        Box::new(else_e),
-    ))
+    let cond = build_comparison(pairs.next().expect(GRAMMAR_INVARIANT));
+    let then_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT));
+    let else_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT));
+    Expr::If(Box::new(cond), Box::new(then_e), Box::new(else_e))
 }

-fn build_comparison(pair: Pair<Rule>) -> Result<Expr> {
+fn build_comparison(pair: Pair<Rule>) -> Expr {
    // comparison = { expr ~ cmp_op ~ expr }
    let mut pairs = pair.into_inner();
-    let lhs_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing comparison lhs"))?;
-    let lhs = build_expr(lhs_pair)?;
-    let op_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing comparison operator"))?;
-    let op = parse_cmp_op(op_pair.as_str())?;
-    let rhs_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing comparison rhs"))?;
-    let rhs = build_expr(rhs_pair)?;
-    Ok(Expr::BinOp(op, Box::new(lhs), Box::new(rhs)))
+    let lhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT));
+    let op = parse_cmp_op(pairs.next().expect(GRAMMAR_INVARIANT).as_str());
+    let rhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT));
+    Expr::BinOp(op, Box::new(lhs), Box::new(rhs))
 }

-fn parse_cmp_op(s: &str) -> Result<BinOp> {
+fn parse_cmp_op(s: &str) -> BinOp {
+    // cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" }
    match s {
-        "=" => Ok(BinOp::Eq),
-        "!=" => Ok(BinOp::Ne),
-        "<" => Ok(BinOp::Lt),
-        ">" => Ok(BinOp::Gt),
-        "<=" => Ok(BinOp::Le),
-        ">=" => Ok(BinOp::Ge),
-        o => Err(anyhow!("unknown comparison operator: {}", o)),
+        "=" => BinOp::Eq,
+        "!=" => BinOp::Ne,
+        "<" => BinOp::Lt,
+        ">" => BinOp::Gt,
+        "<=" => BinOp::Le,
+        // ">=" is the only remaining alternative.
+        _ => BinOp::Ge,
    }
 }

-fn build_filter(pair: Pair<Rule>) -> Result<Filter> {
+fn build_filter(pair: Pair<Rule>) -> Filter {
    // where_clause / inline_where both have shape:
    //   ^"WHERE" ~ identifier ~ "=" ~ filter_value
    let mut pairs = pair.into_inner();
-    let cat_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing WHERE category"))?;
-    let category = identifier_to_string(cat_pair);
-    let val_pair = pairs
-        .next()
-        .ok_or_else(|| anyhow!("missing WHERE value"))?;
-    let item = filter_value_to_string(val_pair);
-    Ok(Filter { category, item })
+    let category = identifier_to_string(pairs.next().expect(GRAMMAR_INVARIANT));
+    let item = filter_value_to_string(pairs.next().expect(GRAMMAR_INVARIANT));
+    Filter { category, item }
 }

 fn filter_value_to_string(pair: Pair<Rule>) -> String {
    // filter_value = { string | pipe_quoted | bare_ident }
-    let inner = pair
-        .into_inner()
-        .next()
-        .expect("filter_value must have an inner pair");
+    let inner = first_inner(pair);
    let s = inner.as_str();
-    match inner.as_rule() {
-        Rule::string => strip_string_quotes(s),
-        Rule::pipe_quoted => unquote_pipe(s),
-        _ => s.to_string(),
+    let rule = inner.as_rule();
+    if rule == Rule::string {
+        strip_string_quotes(s)
+    } else if rule == Rule::pipe_quoted {
+        unquote_pipe(s)
+    } else {
+        // bare_ident is the only remaining alternative.
+        s.to_string()
    }
 }

@@ -257,8 +239,8 @@ fn strip_string_quotes(s: &str) -> String {
 }

 /// Strip surrounding pipes and apply backslash escapes: `\|` → `|`,
-/// `\\` → `\`, `\n` → newline.  Matches the escape semantics documented
-/// in src/persistence/improv.pest.
+/// `\\` → `\`, `\n` → newline, and any other `\X` is preserved verbatim.
+/// Matches the escape semantics documented in src/persistence/improv.pest.
 fn unquote_pipe(s: &str) -> String {
    debug_assert!(is_pipe_quoted(s));
    let inner = &s[1..s.len() - 1];
@@ -266,15 +248,18 @@ fn unquote_pipe(s: &str) -> String {
    let mut chars = inner.chars();
    while let Some(c) = chars.next() {
        if c == '\\' {
-            match chars.next() {
-                Some('|') => out.push('|'),
-                Some('\\') => out.push('\\'),
-                Some('n') => out.push('\n'),
-                Some(other) => {
+            // The grammar rule `"\\" ~ ANY` guarantees that a backslash
+            // inside a pipe-quoted identifier is always followed by another
+            // character, so `chars.next()` cannot be `None` here.
+            let escaped = chars.next().expect(GRAMMAR_INVARIANT);
+            match escaped {
+                '|' => out.push('|'),
+                '\\' => out.push('\\'),
+                'n' => out.push('\n'),
+                other => {
                    out.push('\\');
                    out.push(other);
                }
-                None => out.push('\\'),
            }
        } else {
            out.push(c);
@@ -285,35 +270,27 @@ fn unquote_pipe(s: &str) -> String {

 // ---- small helpers ------------------------------------------------------

-fn first_inner<'a>(pair: Pair<'a, Rule>, ctx: &str) -> Result<Pair<'a, Rule>> {
-    pair.into_inner()
-        .next()
-        .ok_or_else(|| anyhow!("empty rule: {}", ctx))
+fn first_inner(pair: Pair<'_, Rule>) -> Pair<'_, Rule> {
+    pair.into_inner().next().expect(GRAMMAR_INVARIANT)
 }

 /// Fold a left-associative binary-operator rule of the shape
 /// `rule = { child ~ (op ~ child)* }` into a left-leaning BinOp tree.
-fn fold_left_binop<F, M>(pair: Pair<Rule>, mut build_child: F, match_op: M) -> Result<Expr>
+/// The `match_op` closure is infallible — the grammar guarantees the
+/// operator token is one of the expected alternatives.
+fn fold_left_binop<F, M>(pair: Pair<Rule>, mut build_child: F, match_op: M) -> Expr
 where
-    F: FnMut(Pair<Rule>) -> Result<Expr>,
-    M: Fn(&str) -> Option<BinOp>,
+    F: FnMut(Pair<Rule>) -> Expr,
+    M: Fn(&str) -> BinOp,
 {
    let mut pairs = pair.into_inner();
-    let first = pairs
-        .next()
-        .ok_or_else(|| anyhow!("empty binop rule"))?;
-    let mut left = build_child(first)?;
+    let mut left = build_child(pairs.next().expect(GRAMMAR_INVARIANT));
    while let Some(op_pair) = pairs.next() {
-        let op = match_op(op_pair.as_str()).ok_or_else(|| {
-            anyhow!("unexpected operator token: {:?}", op_pair.as_str())
-        })?;
-        let right_pair = pairs
-            .next()
-            .ok_or_else(|| anyhow!("missing rhs for operator"))?;
-        let right = build_child(right_pair)?;
+        let op = match_op(op_pair.as_str());
+        let right = build_child(pairs.next().expect(GRAMMAR_INVARIANT));
        left = Expr::BinOp(op, Box::new(left), Box::new(right));
    }
-    Ok(left)
+    left
 }

 #[cfg(test)]
@@ -636,4 +613,458 @@ mod tests {
            panic!("Expected Ref, got: {:?}", f.expr);
        }
    }
+
+    #[test]
+    fn pipe_quoted_escape_double_backslash() {
+        // \\ inside a pipe-quoted identifier is a literal backslash
+        let f = parse_formula("X = |A\\\\B|", "Cat").unwrap();
+        if let Expr::Ref(ref s) = f.expr {
+            assert_eq!(s, "A\\B");
+        } else {
+            panic!("Expected Ref, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn pipe_quoted_escape_newline() {
+        // \n inside a pipe-quoted identifier is a literal newline
+        let f = parse_formula("X = |A\\nB|", "Cat").unwrap();
+        if let Expr::Ref(ref s) = f.expr {
+            assert_eq!(s, "A\nB");
+        } else {
+            panic!("Expected Ref, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn pipe_quoted_unknown_escape_preserved() {
+        // Any `\X` where X isn't |, \, or n is preserved verbatim as
+        // backslash-plus-character.  The grammar's `"\\" ~ ANY` allows
+        // any following character; we just don't interpret it.
+        let f = parse_formula("X = |A\\zB|", "Cat").unwrap();
+        if let Expr::Ref(ref s) = f.expr {
+            assert_eq!(s, "A\\zB");
+        } else {
+            panic!("Expected Ref, got: {:?}", f.expr);
+        }
+    }
+
+    // ── Operator precedence and associativity ──────────────────────────
+
+    #[test]
+    fn mul_binds_tighter_than_add() {
+        // `A + B * C` must parse as Add(A, Mul(B, C))
+        let f = parse_formula("X = A + B * C", "Cat").unwrap();
+        if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
+            assert!(matches!(**lhs, Expr::Ref(_)));
+            assert!(matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _)));
+        } else {
+            panic!("Expected Add with Mul on rhs, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn pow_binds_tighter_than_mul() {
+        // `A * B ^ C` must parse as Mul(A, Pow(B, C))
+        let f = parse_formula("X = A * B ^ C", "Cat").unwrap();
+        if let Expr::BinOp(BinOp::Mul, lhs, rhs) = &f.expr {
+            assert!(matches!(**lhs, Expr::Ref(_)));
+            assert!(matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _)));
+        } else {
+            panic!("Expected Mul with Pow on rhs, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn subtraction_is_left_associative() {
+        // `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C))
+        let f = parse_formula("X = A - B - C", "Cat").unwrap();
+        if let Expr::BinOp(BinOp::Sub, lhs, rhs) = &f.expr {
+            assert!(matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _)));
+            assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C"));
+        } else {
+            panic!("Expected left-associative Sub, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn division_is_left_associative() {
+        // `A / B / C` must parse as Div(Div(A, B), C)
+        let f = parse_formula("X = A / B / C", "Cat").unwrap();
+        if let Expr::BinOp(BinOp::Div, lhs, rhs) = &f.expr {
+            assert!(matches!(**lhs, Expr::BinOp(BinOp::Div, _, _)));
+            assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C"));
+        } else {
+            panic!("Expected left-associative Div, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn unary_minus_before_pow() {
+        // `-A ^ B` — hand-rolled parser parses this as UnaryMinus(Ref(A))
+        // followed by nothing because pow is inside unary's recursion.
+        // My grammar: unary = unary_minus | primary, unary_minus = "-" ~ primary.
+        // So `-A` is UnaryMinus(Ref(A)); `^B` can't attach because pow_expr
+        // sits OUTSIDE unary.  Result: pow_expr(unary_minus(A)) — no pow
+        // because the optional pow_op doesn't have another unary to bind to.
+        // Verify: `-A ^ B` parses to BinOp(Pow, UnaryMinus(A), B).
+        let f = parse_formula("X = -A ^ B", "Cat").unwrap();
+        if let Expr::BinOp(BinOp::Pow, lhs, rhs) = &f.expr {
+            assert!(matches!(**lhs, Expr::UnaryMinus(_)));
+            assert!(matches!(**rhs, Expr::Ref(ref s) if s == "B"));
+        } else {
+            panic!("Expected Pow(UnaryMinus, Ref), got: {:?}", f.expr);
+        }
+    }
+
+    // ── Number literal variants ────────────────────────────────────────
+
+    #[test]
+    fn integer_literal() {
+        let f = parse_formula("X = 42", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Number(n) if (n - 42.0).abs() < 1e-10));
+    }
+
+    #[test]
+    fn zero_literal() {
+        let f = parse_formula("X = 0", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Number(n) if n == 0.0));
+    }
+
+    #[test]
+    fn decimal_literal_without_integer_part() {
+        let f = parse_formula("X = .5", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Number(n) if (n - 0.5).abs() < 1e-10));
+    }
+
+    #[test]
+    fn decimal_literal_with_trailing_dot() {
+        let f = parse_formula("X = 5.", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Number(n) if (n - 5.0).abs() < 1e-10));
+    }
+
+    #[test]
+    fn decimal_literal_with_integer_and_fraction() {
+        let f = parse_formula("X = 123.456", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Number(n) if (n - 123.456).abs() < 1e-10));
+    }
+
+    // ── Filter value variants ──────────────────────────────────────────
+
+    #[test]
+    fn where_with_bare_identifier_value() {
+        // filter_value = { string | pipe_quoted | bare_ident } — exercise
+        // the bare_ident branch.
+        let f = parse_formula("X = Revenue WHERE Region = East", "Cat").unwrap();
+        let filter = f.filter.as_ref().unwrap();
+        assert_eq!(filter.category, "Region");
+        assert_eq!(filter.item, "East");
+    }
+
+    // ── Nested constructs ──────────────────────────────────────────────
+
+    #[test]
+    fn nested_sum_aggregate() {
+        // Nested aggregates — outer SUM wrapping an inner SUM.
+        let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap();
+        if let Expr::Agg(AggFunc::Sum, outer_inner, None) = &f.expr {
+            assert!(matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None)));
+        } else {
+            panic!("Expected nested SUM aggregates, got: {:?}", f.expr);
+        }
+    }
+
+    #[test]
+    fn deeply_nested_parens() {
+        // Parens should flatten away without affecting the AST.
+        let f = parse_formula("X = (((((A + B)))))", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _)));
+    }
+
+    #[test]
+    fn nested_if_expression() {
+        // IF in the then-branch of another IF.
+        let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap();
+        if let Expr::If(_, then_e, else_e) = &f.expr {
+            assert!(matches!(**then_e, Expr::If(_, _, _)));
+            assert!(matches!(**else_e, Expr::Number(n) if n == 3.0));
+        } else {
+            panic!("Expected outer IF, got: {:?}", f.expr);
+        }
+    }
+
+    // ── Whitespace tolerance ───────────────────────────────────────────
+
+    #[test]
+    fn tolerates_tabs_between_tokens() {
+        let f = parse_formula("X\t=\tA\t+\tB", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _)));
+    }
+
+    #[test]
+    fn tolerates_extra_spaces_between_tokens() {
+        let f = parse_formula("X    =    A    +    B", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _)));
+    }
+
+    #[test]
+    fn tolerates_leading_and_trailing_whitespace() {
+        let f = parse_formula("   X = A + B   ", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _)));
+    }
+
+    // ── Case insensitivity of keywords ─────────────────────────────────
+
+    #[test]
+    fn aggregate_function_is_case_insensitive() {
+        // The grammar uses ^"SUM" which is case-insensitive.
+        let f = parse_formula("X = sum(Revenue)", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _)));
+
+        let f = parse_formula("X = Sum(Revenue)", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _)));
+    }
+
+    #[test]
+    fn if_keyword_is_case_insensitive() {
+        let f = parse_formula("X = if(A > B, 1, 0)", "Cat").unwrap();
+        assert!(matches!(f.expr, Expr::If(_, _, _)));
+    }
+
+    #[test]
+    fn where_keyword_is_case_insensitive() {
+        let f = parse_formula("X = Revenue where Region = \"East\"", "Cat").unwrap();
+        assert!(f.filter.is_some());
+    }
+
+    // ── Target variants ────────────────────────────────────────────────
+
+    #[test]
+    fn target_with_underscore_and_hyphen() {
+        let f = parse_formula("my_target-name = A", "Cat").unwrap();
+        assert_eq!(f.target, "my_target-name");
+    }
+
+    #[test]
+    fn pipe_quoted_target_preserves_pipes() {
+        let f = parse_formula("|My Target| = A", "Cat").unwrap();
+        assert_eq!(f.target, "|My Target|");
+    }
+}
+
+// ---- grammar-driven proptests ------------------------------------------
+//
+// Uses pest_meta to read formula.pest at test time and walks the grammar
+// AST to generate random valid formulas.  Mirrors the pattern used by the
+// persistence parser tests in src/persistence/mod.rs.
+
+#[cfg(test)]
+mod generator {
+    use pest_meta::ast::{Expr, RuleType};
+    use pest_meta::parser;
+    use proptest::prelude::*;
+    use std::collections::HashMap;
+
+    /// Parse formula.pest and return rules keyed by name.
+    fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
+        let grammar = include_str!("formula.pest");
+        let pairs = parser::parse(parser::Rule::grammar_rules, grammar)
+            .unwrap_or_else(|e| panic!("Bad grammar: {e}"));
+        let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
+        rules
+            .into_iter()
+            .map(|r| (r.name.clone(), (r.ty, r.expr)))
+            .collect()
+    }
+
+    /// Recursive string generator driven by a pest `Expr`.  `choices` is
+    /// consumed left-to-right at every decision point; the `atomic` flag
+    /// controls whether sequences insert a space between their children,
+    /// so that non-atomic grammar rules produce whitespace-separated
+    /// tokens that pest's implicit WHITESPACE handling accepts.
+    pub struct Gen<'g> {
+        rules: &'g HashMap<String, (RuleType, Expr)>,
+        choices: Vec<u8>,
+        pos: usize,
+    }
+
+    impl<'g> Gen<'g> {
+        pub fn new(rules: &'g HashMap<String, (RuleType, Expr)>, choices: Vec<u8>) -> Self {
+            Self {
+                rules,
+                choices,
+                pos: 0,
+            }
+        }
+
+        fn pick(&mut self) -> u8 {
+            let v = self.choices.get(self.pos).copied().unwrap_or(0);
+            self.pos += 1;
+            v
+        }
+
+        fn emit(&mut self, expr: &Expr, out: &mut String, atomic: bool) {
+            match expr {
+                Expr::Str(s) => out.push_str(s),
+                Expr::Insens(s) => out.push_str(s),
+                Expr::Range(lo, hi) => {
+                    let lo = lo.chars().next().unwrap() as u32;
+                    let hi = hi.chars().next().unwrap() as u32;
+                    let range = hi - lo + 1;
+                    let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
+                    out.push(ch);
+                }
+                Expr::Ident(name) => self.emit_ident(name, out, atomic),
+                Expr::Seq(a, b) => {
+                    self.emit(a, out, atomic);
+                    if !atomic {
+                        out.push(' ');
+                    }
+                    self.emit(b, out, atomic);
+                }
+                Expr::Choice(a, b) => {
+                    let mut alts: Vec<&Expr> = vec![a.as_ref()];
+                    let mut cur = b.as_ref();
+                    while let Expr::Choice(l, r) = cur {
+                        alts.push(l.as_ref());
+                        cur = r.as_ref();
+                    }
+                    alts.push(cur);
+                    let idx = self.pick() as usize % alts.len();
+                    self.emit(alts[idx], out, atomic);
+                }
+                Expr::Opt(inner) => {
+                    // ~66% chance of emitting
+                    if !self.pick().is_multiple_of(3) {
+                        self.emit(inner, out, atomic);
+                    }
+                }
+                Expr::Rep(inner) => {
+                    let count = self.pick() % 4;
+                    for i in 0..count {
+                        if i > 0 && !atomic {
+                            out.push(' ');
+                        }
+                        self.emit(inner, out, atomic);
+                    }
+                }
+                Expr::RepOnce(inner) => {
+                    let count = 1 + self.pick() % 3;
+                    for i in 0..count {
+                        if i > 0 && !atomic {
+                            out.push(' ');
+                        }
+                        self.emit(inner, out, atomic);
+                    }
+                }
+                Expr::NegPred(_) | Expr::PosPred(_) => {
+                    // Lookaheads don't emit output.
+                }
+                _ => {
+                    // Skip unsupported expressions.
+                }
+            }
+        }
+
+        fn emit_ident(&mut self, name: &str, out: &mut String, atomic: bool) {
+            match name {
+                "ANY" | "ASCII_ALPHA" => {
+                    out.push((b'a' + self.pick() % 26) as char);
+                }
+                "ASCII_ALPHANUMERIC" => {
+                    if self.pick().is_multiple_of(2) {
+                        out.push((b'a' + self.pick() % 26) as char);
+                    } else {
+                        out.push((b'0' + self.pick() % 10) as char);
+                    }
+                }
+                "ASCII_DIGIT" => {
+                    out.push((b'0' + self.pick() % 10) as char);
+                }
+                "NEWLINE" => out.push('\n'),
+                "SOI" | "EOI" => {}
+                _ => {
+                    if let Some((ty, inner)) = self.rules.get(name).cloned() {
+                        let inner_atomic = atomic
+                            || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic);
+                        self.emit(&inner, out, inner_atomic);
+                    }
+                }
+            }
+        }
+
+        pub fn generate(&mut self, rule_name: &str) -> String {
+            let mut out = String::new();
+            if let Some((ty, expr)) = self.rules.get(rule_name).cloned() {
+                let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic);
+                self.emit(&expr, &mut out, atomic);
+            }
+            out
+        }
+    }
+
+    /// Proptest strategy: generate a random valid formula by walking the
+    /// grammar's `formula` rule.
+    pub fn formula_string() -> impl Strategy<Value = String> {
+        prop::collection::vec(any::<u8>(), 32..=128).prop_map(|choices| {
+            let rules = load_grammar();
+            Gen::new(&rules, choices).generate("formula")
+        })
+    }
+
+    /// Proptest strategy: generate a random valid standalone expression.
+    pub fn expr_string() -> impl Strategy<Value = String> {
+        prop::collection::vec(any::<u8>(), 32..=128).prop_map(|choices| {
+            let rules = load_grammar();
+            Gen::new(&rules, choices).generate("expr_eoi")
+        })
+    }
+}
+
+#[cfg(test)]
+mod grammar_prop_tests {
+    use super::generator;
+    use super::{parse_expr, parse_formula};
+    use proptest::prelude::*;
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(256))]
+
+        /// Every generator-produced formula parses without error.
+        #[test]
+        fn generated_formula_parses(formula in generator::formula_string()) {
+            let result = parse_formula(&formula, "Cat");
+            prop_assert!(
+                result.is_ok(),
+                "Generated formula failed to parse:\n{}\nError: {}",
+                formula,
+                result.unwrap_err()
+            );
+        }
+
+        /// Every generator-produced standalone expression parses.
+        #[test]
+        fn generated_expr_parses(expr in generator::expr_string()) {
+            let result = parse_expr(&expr);
+            prop_assert!(
+                result.is_ok(),
+                "Generated expression failed to parse:\n{}\nError: {}",
+                expr,
+                result.unwrap_err()
+            );
+        }
+
+        /// Parsing the same input twice is deterministic — the debug
+        /// representation of the resulting AST is identical.
+        #[test]
+        fn parse_is_deterministic(formula in generator::formula_string()) {
+            let r1 = parse_formula(&formula, "Cat");
+            let r2 = parse_formula(&formula, "Cat");
+            prop_assume!(r1.is_ok() && r2.is_ok());
+            let f1 = r1.unwrap();
+            let f2 = r2.unwrap();
+            prop_assert_eq!(&f1.target, &f2.target);
+            prop_assert_eq!(format!("{:?}", f1.expr), format!("{:?}", f2.expr));
+        }
+    }
 }