use anyhow::{Result, anyhow}; use pest::Parser as _; use pest::iterators::Pair; use pest_derive::Parser; use super::ast::{AggFunc, BinOp, Expr, Filter, Formula}; #[derive(Parser)] #[grammar = "formula.pest"] struct FormulaParser; /// Message used by `.expect()` calls on invariants that the grammar /// guarantees. If one of these ever panics, the grammar and the tree /// walker are out of sync — it's a bug, not a runtime condition. const GRAMMAR_INVARIANT: &str = "grammar invariant violated: parser out of sync with formula.pest"; /// Parse a formula string like "Profit = Revenue - Cost" /// or "Tax = Revenue * 0.08 WHERE Region = \"East\"" pub fn parse_formula(raw: &str, target_category: &str) -> Result { let input = raw.trim(); let formula_pair = FormulaParser::parse(Rule::formula, input) .map_err(|e| anyhow!("{}", e))? .next() .expect(GRAMMAR_INVARIANT); Ok(build_formula(formula_pair, input, target_category)) } /// Parse a bare expression (no target, no top-level WHERE clause). /// Fails if the input contains trailing tokens after a complete expression. pub fn parse_expr(s: &str) -> Result { let input = s.trim(); let expr_eoi_pair = FormulaParser::parse(Rule::expr_eoi, input) .map_err(|e| anyhow!("{}", e))? .next() .expect(GRAMMAR_INVARIANT); Ok(build_expr(first_inner(expr_eoi_pair))) } // ---- tree walkers ------------------------------------------------------- // // Every `build_*` function below operates on a pest Pair that has already // been validated by the grammar. Invariants like "a `formula` has exactly // one `target`" or "a `comparison` has an lhs, op, and rhs" are guaranteed // by pest before the tree walker sees the Pair, so these functions are // infallible. Any `.expect(GRAMMAR_INVARIANT)` in here represents a bug // marker — if it ever fires, the grammar and the walker have diverged. fn build_formula(pair: Pair, raw: &str, target_category: &str) -> Formula { let mut target = None; let mut expr = None; let mut filter = None; for inner in pair.into_inner() { let rule = inner.as_rule(); if rule == Rule::target { target = Some(inner.as_str().trim().to_string()); } else if rule == Rule::expr { expr = Some(build_expr(inner)); } else if rule == Rule::where_clause { filter = Some(build_filter(inner)); } // Rule::EOI and any silent rules are ignored. } Formula::new( raw, target.expect(GRAMMAR_INVARIANT), target_category, expr.expect(GRAMMAR_INVARIANT), filter, ) } fn build_expr(pair: Pair) -> Expr { // expr = { add_expr } build_add_expr(first_inner(pair)) } fn build_add_expr(pair: Pair) -> Expr { fold_left_binop(pair, build_mul_expr, |s| { if s == "+" { BinOp::Add } else { // The grammar restricts add_op to "+" | "-". BinOp::Sub } }) } fn build_mul_expr(pair: Pair) -> Expr { fold_left_binop(pair, build_pow_expr, |s| { if s == "*" { BinOp::Mul } else { // The grammar restricts mul_op to "*" | "/". BinOp::Div } }) } fn build_pow_expr(pair: Pair) -> Expr { // pow_expr = { unary ~ (pow_op ~ unary)? } let mut pairs = pair.into_inner(); let base = build_unary(pairs.next().expect(GRAMMAR_INVARIANT)); match pairs.next() { None => base, Some(_pow_op) => { let exp = build_unary(pairs.next().expect(GRAMMAR_INVARIANT)); Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp)) } } } fn build_unary(pair: Pair) -> Expr { // unary = { unary_minus | primary } let inner = first_inner(pair); if inner.as_rule() == Rule::unary_minus { Expr::UnaryMinus(Box::new(build_primary(first_inner(inner)))) } else { // primary is the only other alternative. build_primary(inner) } } fn build_primary(pair: Pair) -> Expr { // primary = { number | agg_call | if_expr | paren_expr | ref_expr } let inner = first_inner(pair); let rule = inner.as_rule(); if rule == Rule::number { Expr::Number(inner.as_str().parse().expect(GRAMMAR_INVARIANT)) } else if rule == Rule::agg_call { build_agg_call(inner) } else if rule == Rule::if_expr { build_if_expr(inner) } else if rule == Rule::paren_expr { build_expr(first_inner(inner)) } else { // ref_expr is the only remaining alternative. Expr::Ref(identifier_to_string(first_inner(inner))) } } fn build_agg_call(pair: Pair) -> Expr { // agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" } let mut pairs = pair.into_inner(); let func = parse_agg_func(pairs.next().expect(GRAMMAR_INVARIANT).as_str()); let inner_expr = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); // The only pair after expr (if any) is `inline_where`, so we can // map it directly without checking the rule variant. let filter = pairs.next().map(build_filter); Expr::Agg(func, Box::new(inner_expr), filter) } fn parse_agg_func(s: &str) -> AggFunc { // agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" } match s.to_ascii_uppercase().as_str() { "SUM" => AggFunc::Sum, "AVG" => AggFunc::Avg, "MIN" => AggFunc::Min, "MAX" => AggFunc::Max, // COUNT is the only remaining alternative. _ => AggFunc::Count, } } fn build_if_expr(pair: Pair) -> Expr { // if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" } let mut pairs = pair.into_inner(); let cond = build_comparison(pairs.next().expect(GRAMMAR_INVARIANT)); let then_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); let else_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); Expr::If(Box::new(cond), Box::new(then_e), Box::new(else_e)) } fn build_comparison(pair: Pair) -> Expr { // comparison = { expr ~ cmp_op ~ expr } let mut pairs = pair.into_inner(); let lhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); let op = parse_cmp_op(pairs.next().expect(GRAMMAR_INVARIANT).as_str()); let rhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); Expr::BinOp(op, Box::new(lhs), Box::new(rhs)) } fn parse_cmp_op(s: &str) -> BinOp { // cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" } match s { "=" => BinOp::Eq, "!=" => BinOp::Ne, "<" => BinOp::Lt, ">" => BinOp::Gt, "<=" => BinOp::Le, // ">=" is the only remaining alternative. _ => BinOp::Ge, } } fn build_filter(pair: Pair) -> Filter { // where_clause / inline_where both have shape: // ^"WHERE" ~ identifier ~ "=" ~ filter_value let mut pairs = pair.into_inner(); let category = identifier_to_string(pairs.next().expect(GRAMMAR_INVARIANT)); let item = filter_value_to_string(pairs.next().expect(GRAMMAR_INVARIANT)); Filter { category, item } } fn filter_value_to_string(pair: Pair) -> String { // filter_value = { string | pipe_quoted | bare_ident } let inner = first_inner(pair); let s = inner.as_str(); let rule = inner.as_rule(); if rule == Rule::string { strip_string_quotes(s) } else if rule == Rule::pipe_quoted { unquote_pipe(s) } else { // bare_ident is the only remaining alternative. s.to_string() } } /// Convert an identifier pair (identifier, pipe_quoted, or bare_ident) to /// its content string. Pipe-quoted identifiers have their delimiters /// stripped and backslash escapes applied; bare identifiers are returned /// verbatim. fn identifier_to_string(pair: Pair) -> String { let s = pair.as_str(); if is_pipe_quoted(s) { unquote_pipe(s) } else { s.to_string() } } fn is_pipe_quoted(s: &str) -> bool { s.len() >= 2 && s.starts_with('|') && s.ends_with('|') } fn strip_string_quotes(s: &str) -> String { debug_assert!(s.len() >= 2 && s.starts_with('"') && s.ends_with('"')); s[1..s.len() - 1].to_string() } /// Strip surrounding pipes and apply backslash escapes: `\|` → `|`, /// `\\` → `\`, `\n` → newline, and any other `\X` is preserved verbatim. /// Matches the escape semantics documented in src/persistence/improv.pest. fn unquote_pipe(s: &str) -> String { debug_assert!(is_pipe_quoted(s)); let inner = &s[1..s.len() - 1]; let mut out = String::with_capacity(inner.len()); let mut chars = inner.chars(); while let Some(c) = chars.next() { if c == '\\' { // The grammar rule `"\\" ~ ANY` guarantees that a backslash // inside a pipe-quoted identifier is always followed by another // character, so `chars.next()` cannot be `None` here. let escaped = chars.next().expect(GRAMMAR_INVARIANT); match escaped { '|' => out.push('|'), '\\' => out.push('\\'), 'n' => out.push('\n'), other => { out.push('\\'); out.push(other); } } } else { out.push(c); } } out } // ---- small helpers ------------------------------------------------------ fn first_inner(pair: Pair<'_, Rule>) -> Pair<'_, Rule> { pair.into_inner().next().expect(GRAMMAR_INVARIANT) } /// Fold a left-associative binary-operator rule of the shape /// `rule = { child ~ (op ~ child)* }` into a left-leaning BinOp tree. /// The `match_op` closure is infallible — the grammar guarantees the /// operator token is one of the expected alternatives. fn fold_left_binop(pair: Pair, mut build_child: F, match_op: M) -> Expr where F: FnMut(Pair) -> Expr, M: Fn(&str) -> BinOp, { let mut pairs = pair.into_inner(); let mut left = build_child(pairs.next().expect(GRAMMAR_INVARIANT)); while let Some(op_pair) = pairs.next() { let op = match_op(op_pair.as_str()); let right = build_child(pairs.next().expect(GRAMMAR_INVARIANT)); left = Expr::BinOp(op, Box::new(left), Box::new(right)); } left } #[cfg(test)] mod tests { use super::parse_formula; use crate::{AggFunc, BinOp, Expr}; #[test] fn parse_simple_subtraction() { let f = parse_formula("Profit = Revenue - Cost", "Foo").unwrap(); assert_eq!(f.target, "Profit"); assert_eq!(f.target_category, "Foo"); assert!(matches!(f.expr, Expr::BinOp(BinOp::Sub, _, _))); } #[test] fn parse_where_clause() { let f = parse_formula("EastRev = Revenue WHERE Region = \"East\"", "Foo").unwrap(); assert_eq!(f.target, "EastRev"); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.category, "Region"); assert_eq!(filter.item, "East"); } #[test] fn parse_sum_aggregation() { let f = parse_formula("Total = SUM(Revenue)", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); } #[test] fn parse_avg_aggregation() { let f = parse_formula("Avg = AVG(Revenue)", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Avg, _, _))); } #[test] fn parse_if_expression() { let f = parse_formula("Capped = IF(Revenue > 1000, 1000, Revenue)", "Foo").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); } #[test] fn parse_numeric_literal() { let f = parse_formula("Fixed = 42", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Number(n) if (n - 42.0).abs() < 1e-10)); } #[test] fn parse_chained_arithmetic() { parse_formula("X = (A + B) * (C - D)", "Cat").unwrap(); } #[test] fn parse_missing_equals_returns_error() { assert!(parse_formula("BadFormula Revenue Cost", "Cat").is_err()); } // ── Aggregate functions ───────────────────────────────────────────── #[test] fn parse_min_aggregation() { let f = parse_formula("Lo = MIN(Revenue)", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Min, _, _))); } #[test] fn parse_max_aggregation() { let f = parse_formula("Hi = MAX(Revenue)", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Max, _, _))); } #[test] fn parse_count_aggregation() { let f = parse_formula("N = COUNT(Revenue)", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Count, _, _))); } // ── Aggregate with WHERE filter ───────────────────────────────────── #[test] fn parse_sum_with_top_level_where_works() { let f = parse_formula("EastTotal = SUM(Revenue) WHERE Region = \"East\"", "Foo").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.category, "Region"); assert_eq!(filter.item, "East"); } /// Regression: WHERE inside aggregate parens must parse as the /// aggregate's inline filter, not as a top-level WHERE clause. #[test] fn parse_sum_with_inline_where_filter() { let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap(); assert!(matches!( &f.expr, Expr::Agg(AggFunc::Sum, inner, Some(filter)) if matches!(**inner, Expr::Ref(_)) && filter.category == "Region" && filter.item == "East" )); } // ── Comparison operators ──────────────────────────────────────────── #[test] fn parse_if_with_comparison_operators() { let f = parse_formula("X = IF(A != 0, A, 1)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); let f = parse_formula("X = IF(A < 10, A, 10)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); let f = parse_formula("X = IF(A <= 10, A, 10)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); let f = parse_formula("X = IF(A >= 10, 10, A)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); let f = parse_formula("X = IF(A = B, 1, 0)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); } // ── Quoted strings in WHERE ───────────────────────────────────────── #[test] fn parse_where_with_quoted_string_inside_expression() { let f = parse_formula("X = Revenue WHERE Region = \"West Coast\"", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "West Coast"); } // ── Power operator ────────────────────────────────────────────────── #[test] fn parse_power_operator() { let f = parse_formula("Sq = X ^ 2", "Cat").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Pow, _, _))); } // ── Unary minus ───────────────────────────────────────────────────── #[test] fn parse_unary_minus() { let f = parse_formula("Neg = -Revenue", "Foo").unwrap(); assert!(matches!(f.expr, Expr::UnaryMinus(_))); } // ── Division and multiplication ───────────────────────────────────── #[test] fn parse_multiplication() { let f = parse_formula("Double = Revenue * 2", "Foo").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Mul, _, _))); } #[test] fn parse_division() { let f = parse_formula("Half = Revenue / 2", "Foo").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Div, _, _))); } // ── Parenthesized expression ──────────────────────────────────────── #[test] fn parse_nested_parens() { let f = parse_formula("X = ((A + B))", "Cat").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); } // ── Aggregate function name used as ref (no parens) ───────────────── #[test] fn parse_aggregate_name_without_parens_is_ref() { // "SUM" without parens should be treated as a reference, not a function let f = parse_formula("X = SUM + 1", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, _) if matches!(**lhs, Expr::Ref(_)) )); } #[test] fn parse_if_without_parens_is_ref() { // "IF" without parens should be treated as a reference let f = parse_formula("X = IF + 1", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, _) if matches!(**lhs, Expr::Ref(_)) )); } // ── Quoted string in WHERE ────────────────────────────────────────── #[test] fn parse_quoted_string_in_where() { let f = parse_formula("X = Revenue WHERE Region = \"East\"", "Cat").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "East"); } // ── Error paths ───────────────────────────────────────────────────── #[test] fn parse_unexpected_token_error() { use super::parse_expr; // Extra tokens after a valid expression assert!(parse_expr("1 + 2 3").is_err()); } #[test] fn parse_unexpected_character_error() { use super::parse_expr; assert!(parse_expr("@invalid").is_err()); } #[test] fn parse_empty_expression_error() { use super::parse_expr; assert!(parse_expr("").is_err()); } // ── Multi-word identifiers must be pipe-quoted ────────────────────── #[test] fn multi_word_bare_identifier_is_rejected() { // Multi-word identifiers must be pipe-quoted; bare multi-word fails // the `bare_name`-compatible grammar rule. assert!(parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").is_err()); } // ── WHERE inside quotes in the expression ─────────────────────────── #[test] fn where_inside_quotes_is_not_a_keyword() { // A filter value containing the literal text "WHERE" is parsed as // a string, not as a nested WHERE keyword. let f = parse_formula("X = Revenue WHERE Region = \"WHERE\"", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "WHERE"); } // ── Pipe-quoted identifiers ───────────────────────────────────────── #[test] fn pipe_quoted_identifier_in_expression() { let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Foo").unwrap(); assert_eq!(f.target, "|Total Revenue|"); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, rhs) if matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue") && matches!(**rhs, Expr::Ref(ref s) if s == "Bonus") )); } #[test] fn pipe_quoted_keyword_as_identifier() { // A category named "WHERE" can be referenced with pipes let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, rhs) if matches!(**lhs, Expr::Ref(ref s) if s == "WHERE") && matches!(**rhs, Expr::Ref(ref s) if s == "SUM") )); } #[test] fn pipe_quoted_identifier_with_special_chars() { // Pipes allow characters that would normally break tokenization let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, rhs) if matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)") && matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax") )); } #[test] fn pipe_quoted_in_aggregate() { let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::Agg(AggFunc::Sum, inner, None) if matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue") )); } #[test] fn pipe_quoted_in_where_filter_value() { let f = parse_formula("X = Revenue WHERE Region = |East Coast|", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "East Coast"); } #[test] fn pipe_quoted_in_inline_where() { let f = parse_formula("X = SUM(Revenue WHERE |Region Name| = |East Coast|)", "Foo").unwrap(); assert!(matches!( &f.expr, Expr::Agg(AggFunc::Sum, _, Some(filter)) if filter.category == "Region Name" && filter.item == "East Coast" )); } // ── Pipe-quoted escape semantics ──────────────────────────────────── #[test] fn pipe_quoted_escape_literal_pipe() { // \| inside a pipe-quoted identifier is a literal pipe let f = parse_formula("X = |A\\|B|", "Cat").unwrap(); assert!(matches!(&f.expr, Expr::Ref(s) if s == "A|B")); } #[test] fn pipe_quoted_escape_double_backslash() { // \\ inside a pipe-quoted identifier is a literal backslash let f = parse_formula("X = |A\\\\B|", "Cat").unwrap(); assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\\B")); } #[test] fn pipe_quoted_escape_newline() { // \n inside a pipe-quoted identifier is a literal newline let f = parse_formula("X = |A\\nB|", "Cat").unwrap(); assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\nB")); } #[test] fn pipe_quoted_unknown_escape_preserved() { // Any `\X` where X isn't |, \, or n is preserved verbatim as // backslash-plus-character. The grammar's `"\\" ~ ANY` allows // any following character; we just don't interpret it. let f = parse_formula("X = |A\\zB|", "Cat").unwrap(); assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\\zB")); } // ── Operator precedence and associativity ────────────────────────── #[test] fn mul_binds_tighter_than_add() { // `A + B * C` must parse as Add(A, Mul(B, C)) let f = parse_formula("X = A + B * C", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, rhs) if matches!(**lhs, Expr::Ref(_)) && matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _)) )); } #[test] fn pow_binds_tighter_than_mul() { // `A * B ^ C` must parse as Mul(A, Pow(B, C)) let f = parse_formula("X = A * B ^ C", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Mul, lhs, rhs) if matches!(**lhs, Expr::Ref(_)) && matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _)) )); } #[test] fn subtraction_is_left_associative() { // `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C)) let f = parse_formula("X = A - B - C", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Sub, lhs, rhs) if matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _)) && matches!(**rhs, Expr::Ref(ref s) if s == "C") )); } #[test] fn division_is_left_associative() { // `A / B / C` must parse as Div(Div(A, B), C) let f = parse_formula("X = A / B / C", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Div, lhs, rhs) if matches!(**lhs, Expr::BinOp(BinOp::Div, _, _)) && matches!(**rhs, Expr::Ref(ref s) if s == "C") )); } #[test] fn unary_minus_before_pow() { // `-A ^ B` must parse as Pow(UnaryMinus(A), B). The `-` binds // through `unary_minus = { "-" ~ primary }`, producing a unary // node that is then used as the pow_expr's base. let f = parse_formula("X = -A ^ B", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Pow, lhs, rhs) if matches!(**lhs, Expr::UnaryMinus(_)) && matches!(**rhs, Expr::Ref(ref s) if s == "B") )); } // ── Number literal variants ──────────────────────────────────────── #[test] fn integer_literal() { let f = parse_formula("X = 42", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Number(n) if (n - 42.0).abs() < 1e-10)); } #[test] fn zero_literal() { let f = parse_formula("X = 0", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Number(n) if n == 0.0)); } #[test] fn decimal_literal_without_integer_part() { let f = parse_formula("X = .5", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Number(n) if (n - 0.5).abs() < 1e-10)); } #[test] fn decimal_literal_with_trailing_dot() { let f = parse_formula("X = 5.", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Number(n) if (n - 5.0).abs() < 1e-10)); } #[test] fn decimal_literal_with_integer_and_fraction() { let f = parse_formula("X = 123.456", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Number(n) if (n - 123.456).abs() < 1e-10)); } // ── Filter value variants ────────────────────────────────────────── #[test] fn where_with_bare_identifier_value() { // filter_value = { string | pipe_quoted | bare_ident } — exercise // the bare_ident branch. let f = parse_formula("X = Revenue WHERE Region = East", "Cat").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.category, "Region"); assert_eq!(filter.item, "East"); } // ── Nested constructs ────────────────────────────────────────────── #[test] fn nested_sum_aggregate() { // Nested aggregates — outer SUM wrapping an inner SUM. let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::Agg(AggFunc::Sum, outer_inner, None) if matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None)) )); } #[test] fn deeply_nested_parens() { // Parens should flatten away without affecting the AST. let f = parse_formula("X = (((((A + B)))))", "Cat").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); } #[test] fn nested_if_expression() { // IF in the then-branch of another IF. let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::If(_, then_e, else_e) if matches!(**then_e, Expr::If(_, _, _)) && matches!(**else_e, Expr::Number(n) if n == 3.0) )); } // ── Whitespace tolerance ─────────────────────────────────────────── #[test] fn tolerates_tabs_between_tokens() { let f = parse_formula("X\t=\tA\t+\tB", "Cat").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); } #[test] fn tolerates_extra_spaces_between_tokens() { let f = parse_formula("X = A + B", "Cat").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); } #[test] fn tolerates_leading_and_trailing_whitespace() { let f = parse_formula(" X = A + B ", "Cat").unwrap(); assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); } // ── Case insensitivity of keywords ───────────────────────────────── #[test] fn aggregate_function_is_case_insensitive() { // The grammar uses ^"SUM" which is case-insensitive. let f = parse_formula("X = sum(Revenue)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); let f = parse_formula("X = Sum(Revenue)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); } #[test] fn if_keyword_is_case_insensitive() { let f = parse_formula("X = if(A > B, 1, 0)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); } #[test] fn where_keyword_is_case_insensitive() { let f = parse_formula("X = Revenue where Region = \"East\"", "Cat").unwrap(); assert!(f.filter.is_some()); } // ── Target variants ──────────────────────────────────────────────── #[test] fn target_with_underscore_and_hyphen() { let f = parse_formula("my_target-name = A", "Cat").unwrap(); assert_eq!(f.target, "my_target-name"); } /// Regression: the hand-rolled tokenizer didn't allow `-` inside bare /// identifiers, so references to persistence-legal names like /// `east-coast` were silently parsed as `east - coast` (a subtraction /// of two unrelated Refs). The pest grammar mirrors improv.pest's /// `bare_name`, which does allow `-`, so a hyphenated name now parses /// as a single identifier. #[test] fn hyphen_in_bare_identifier_reference() { let f = parse_formula("Total = east-coast + west-coast", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::BinOp(BinOp::Add, lhs, rhs) if matches!(**lhs, Expr::Ref(ref s) if s == "east-coast") && matches!(**rhs, Expr::Ref(ref s) if s == "west-coast") )); } #[test] fn hyphen_in_bare_identifier_inside_aggregate() { // Same fix, but inside a SUM() to exercise the aggregate path as well. let f = parse_formula("X = SUM(east-coast)", "Cat").unwrap(); assert!(matches!( &f.expr, Expr::Agg(AggFunc::Sum, inner, None) if matches!(**inner, Expr::Ref(ref s) if s == "east-coast") )); } #[test] fn pipe_quoted_target_preserves_pipes() { let f = parse_formula("|My Target| = A", "Cat").unwrap(); assert_eq!(f.target, "|My Target|"); } } // ---- grammar-driven proptests ------------------------------------------ // // Uses pest_meta to read formula.pest at test time and walks the grammar // AST to generate random valid formulas. Mirrors the pattern used by the // persistence parser tests in src/persistence/mod.rs. #[cfg(test)] mod generator { use pest_meta::ast::{Expr, RuleType}; use pest_meta::parser; use proptest::prelude::*; use std::collections::HashMap; /// Parse formula.pest and return rules keyed by name. fn load_grammar() -> HashMap { let grammar = include_str!("formula.pest"); let pairs = parser::parse(parser::Rule::grammar_rules, grammar) .unwrap_or_else(|e| panic!("Bad grammar: {e}")); let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}")); rules .into_iter() .map(|r| (r.name.clone(), (r.ty, r.expr))) .collect() } /// Recursive string generator driven by a pest `Expr`. `choices` is /// consumed left-to-right at every decision point; the `atomic` flag /// controls whether sequences insert a space between their children, /// so that non-atomic grammar rules produce whitespace-separated /// tokens that pest's implicit WHITESPACE handling accepts. pub struct Gen<'g> { rules: &'g HashMap, choices: Vec, pos: usize, } impl<'g> Gen<'g> { pub fn new(rules: &'g HashMap, choices: Vec) -> Self { Self { rules, choices, pos: 0, } } fn pick(&mut self) -> u8 { let v = self.choices.get(self.pos).copied().unwrap_or(0); self.pos += 1; v } fn emit(&mut self, expr: &Expr, out: &mut String, atomic: bool) { match expr { Expr::Str(s) => out.push_str(s), Expr::Insens(s) => out.push_str(s), Expr::Ident(name) => self.emit_ident(name, out, atomic), Expr::Seq(a, b) => { self.emit(a, out, atomic); if !atomic { out.push(' '); } self.emit(b, out, atomic); } Expr::Choice(a, b) => { // 50/50 between the two branches of each Choice. // Nested Choices (used by rules with 3+ alternatives) // recurse into themselves, so deeper branches are // chosen less often than shallower ones — good // enough for the random-generation use case. if self.pick().is_multiple_of(2) { self.emit(a, out, atomic); } else { self.emit(b, out, atomic); } } Expr::Opt(inner) => { // ~66% chance of emitting the optional branch. if !self.pick().is_multiple_of(3) { self.emit(inner, out, atomic); } } Expr::Rep(inner) => { let count = self.pick() % 4; for i in 0..count { if i > 0 && !atomic { out.push(' '); } self.emit(inner, out, atomic); } } Expr::RepOnce(inner) => { // formula.pest only uses `+` inside atomic rules // (ASCII_DIGIT+), so inner repetitions never need // whitespace separation. let count = 1 + self.pick() % 3; for _ in 0..count { self.emit(inner, out, atomic); } } // Lookaheads (NegPred, PosPred) don't emit output. Any // other Expr variant (Range, RepExact, Push, PeekSlice, // Skip, …) is unused by formula.pest and silently // produces nothing; if the grammar starts using one, // generated formulas will fail to parse and we'll know. _ => {} } } fn emit_ident(&mut self, name: &str, out: &mut String, atomic: bool) { match name { "ANY" | "ASCII_ALPHA" => { out.push((b'a' + self.pick() % 26) as char); } "ASCII_ALPHANUMERIC" => { if self.pick().is_multiple_of(2) { out.push((b'a' + self.pick() % 26) as char); } else { out.push((b'0' + self.pick() % 10) as char); } } "ASCII_DIGIT" => { out.push((b'0' + self.pick() % 10) as char); } "NEWLINE" => out.push('\n'), "SOI" | "EOI" => {} _ => { // Every Ident in formula.pest refers to a rule that // exists in the grammar file — if it doesn't, the // grammar itself failed to compile and we wouldn't be // running tests, so `expect` here is a bug marker. let (ty, inner) = self .rules .get(name) .cloned() .expect("rule referenced by grammar exists"); let inner_atomic = atomic || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); self.emit(&inner, out, inner_atomic); } } } pub fn generate(&mut self, rule_name: &str) -> String { let mut out = String::new(); let (ty, expr) = self .rules .get(rule_name) .cloned() .expect("entry rule exists in formula.pest"); let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); self.emit(&expr, &mut out, atomic); out } } /// Proptest strategy: generate a random valid formula by walking the /// grammar's `formula` rule. pub fn formula_string() -> impl Strategy { prop::collection::vec(any::(), 32..=128).prop_map(|choices| { let rules = load_grammar(); Gen::new(&rules, choices).generate("formula") }) } /// Proptest strategy: generate a random valid standalone expression. pub fn expr_string() -> impl Strategy { prop::collection::vec(any::(), 32..=128).prop_map(|choices| { let rules = load_grammar(); Gen::new(&rules, choices).generate("expr_eoi") }) } } #[cfg(test)] mod grammar_prop_tests { use super::generator; use super::{parse_expr, parse_formula}; use proptest::prelude::*; proptest! { #![proptest_config(ProptestConfig::with_cases(256))] /// Every generator-produced formula parses without error. #[test] fn generated_formula_parses(formula in generator::formula_string()) { let result = parse_formula(&formula, "Cat"); prop_assert!( result.is_ok(), "Generated formula failed to parse:\n{}\nError: {}", formula, result.unwrap_err() ); } /// Every generator-produced standalone expression parses. #[test] fn generated_expr_parses(expr in generator::expr_string()) { let result = parse_expr(&expr); prop_assert!( result.is_ok(), "Generated expression failed to parse:\n{}\nError: {}", expr, result.unwrap_err() ); } /// Parsing the same input twice is deterministic — the debug /// representation of the resulting AST is identical. #[test] fn parse_is_deterministic(formula in generator::formula_string()) { let r1 = parse_formula(&formula, "Cat"); let r2 = parse_formula(&formula, "Cat"); prop_assume!(r1.is_ok() && r2.is_ok()); let f1 = r1.unwrap(); let f2 = r2.unwrap(); prop_assert_eq!(&f1.target, &f2.target); prop_assert_eq!(format!("{:?}", f1.expr), format!("{:?}", f2.expr)); } } }