diff --git a/crates/improvise-formula/src/parser.rs b/crates/improvise-formula/src/parser.rs index 51cb101..b76506f 100644 --- a/crates/improvise-formula/src/parser.rs +++ b/crates/improvise-formula/src/parser.rs @@ -385,13 +385,13 @@ mod tests { #[test] fn parse_sum_with_inline_where_filter() { let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap(); - if let Expr::Agg(AggFunc::Sum, inner, Some(filter)) = &f.expr { - assert!(matches!(**inner, Expr::Ref(_))); - assert_eq!(filter.category, "Region"); - assert_eq!(filter.item, "East"); - } else { - panic!("Expected SUM with inline WHERE filter, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::Agg(AggFunc::Sum, inner, Some(filter)) + if matches!(**inner, Expr::Ref(_)) + && filter.category == "Region" + && filter.item == "East" + )); } // ── Comparison operators ──────────────────────────────────────────── @@ -467,21 +467,20 @@ mod tests { fn parse_aggregate_name_without_parens_is_ref() { // "SUM" without parens should be treated as a reference, not a function let f = parse_formula("X = SUM + 1", "Cat").unwrap(); - assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); - if let Expr::BinOp(_, lhs, _) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(_))); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, _) if matches!(**lhs, Expr::Ref(_)) + )); } #[test] fn parse_if_without_parens_is_ref() { // "IF" without parens should be treated as a reference let f = parse_formula("X = IF + 1", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Add, lhs, _) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(_))); - } else { - panic!("Expected BinOp(Add), got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, _) if matches!(**lhs, Expr::Ref(_)) + )); } // ── Quoted string in WHERE ────────────────────────────────────────── @@ -540,46 +539,46 @@ mod tests { fn pipe_quoted_identifier_in_expression() { let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Foo").unwrap(); assert_eq!(f.target, "|Total Revenue|"); - if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue")); - assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Bonus")); - } else { - panic!("Expected Add, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, rhs) + if matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue") + && matches!(**rhs, Expr::Ref(ref s) if s == "Bonus") + )); } #[test] fn pipe_quoted_keyword_as_identifier() { // A category named "WHERE" can be referenced with pipes let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(ref s) if s == "WHERE")); - assert!(matches!(**rhs, Expr::Ref(ref s) if s == "SUM")); - } else { - panic!("Expected Add, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, rhs) + if matches!(**lhs, Expr::Ref(ref s) if s == "WHERE") + && matches!(**rhs, Expr::Ref(ref s) if s == "SUM") + )); } #[test] fn pipe_quoted_identifier_with_special_chars() { // Pipes allow characters that would normally break tokenization let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)")); - assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax")); - } else { - panic!("Expected Add, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, rhs) + if matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)") + && matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax") + )); } #[test] fn pipe_quoted_in_aggregate() { let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap(); - if let Expr::Agg(AggFunc::Sum, inner, None) = &f.expr { - assert!(matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue")); - } else { - panic!("Expected SUM aggregate, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::Agg(AggFunc::Sum, inner, None) + if matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue") + )); } #[test] @@ -593,12 +592,11 @@ mod tests { fn pipe_quoted_in_inline_where() { let f = parse_formula("X = SUM(Revenue WHERE |Region Name| = |East Coast|)", "Foo").unwrap(); - if let Expr::Agg(AggFunc::Sum, _, Some(filter)) = &f.expr { - assert_eq!(filter.category, "Region Name"); - assert_eq!(filter.item, "East Coast"); - } else { - panic!("Expected SUM with WHERE filter, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::Agg(AggFunc::Sum, _, Some(filter)) + if filter.category == "Region Name" && filter.item == "East Coast" + )); } // ── Pipe-quoted escape semantics ──────────────────────────────────── @@ -607,33 +605,21 @@ mod tests { fn pipe_quoted_escape_literal_pipe() { // \| inside a pipe-quoted identifier is a literal pipe let f = parse_formula("X = |A\\|B|", "Cat").unwrap(); - if let Expr::Ref(ref s) = f.expr { - assert_eq!(s, "A|B"); - } else { - panic!("Expected Ref, got: {:?}", f.expr); - } + assert!(matches!(&f.expr, Expr::Ref(s) if s == "A|B")); } #[test] fn pipe_quoted_escape_double_backslash() { // \\ inside a pipe-quoted identifier is a literal backslash let f = parse_formula("X = |A\\\\B|", "Cat").unwrap(); - if let Expr::Ref(ref s) = f.expr { - assert_eq!(s, "A\\B"); - } else { - panic!("Expected Ref, got: {:?}", f.expr); - } + assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\\B")); } #[test] fn pipe_quoted_escape_newline() { // \n inside a pipe-quoted identifier is a literal newline let f = parse_formula("X = |A\\nB|", "Cat").unwrap(); - if let Expr::Ref(ref s) = f.expr { - assert_eq!(s, "A\nB"); - } else { - panic!("Expected Ref, got: {:?}", f.expr); - } + assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\nB")); } #[test] @@ -642,11 +628,7 @@ mod tests { // backslash-plus-character. The grammar's `"\\" ~ ANY` allows // any following character; we just don't interpret it. let f = parse_formula("X = |A\\zB|", "Cat").unwrap(); - if let Expr::Ref(ref s) = f.expr { - assert_eq!(s, "A\\zB"); - } else { - panic!("Expected Ref, got: {:?}", f.expr); - } + assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\\zB")); } // ── Operator precedence and associativity ────────────────────────── @@ -655,66 +637,62 @@ mod tests { fn mul_binds_tighter_than_add() { // `A + B * C` must parse as Add(A, Mul(B, C)) let f = parse_formula("X = A + B * C", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(_))); - assert!(matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _))); - } else { - panic!("Expected Add with Mul on rhs, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, rhs) + if matches!(**lhs, Expr::Ref(_)) + && matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _)) + )); } #[test] fn pow_binds_tighter_than_mul() { // `A * B ^ C` must parse as Mul(A, Pow(B, C)) let f = parse_formula("X = A * B ^ C", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Mul, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::Ref(_))); - assert!(matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _))); - } else { - panic!("Expected Mul with Pow on rhs, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Mul, lhs, rhs) + if matches!(**lhs, Expr::Ref(_)) + && matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _)) + )); } #[test] fn subtraction_is_left_associative() { // `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C)) let f = parse_formula("X = A - B - C", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Sub, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _))); - assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); - } else { - panic!("Expected left-associative Sub, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Sub, lhs, rhs) + if matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _)) + && matches!(**rhs, Expr::Ref(ref s) if s == "C") + )); } #[test] fn division_is_left_associative() { // `A / B / C` must parse as Div(Div(A, B), C) let f = parse_formula("X = A / B / C", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Div, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::BinOp(BinOp::Div, _, _))); - assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); - } else { - panic!("Expected left-associative Div, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Div, lhs, rhs) + if matches!(**lhs, Expr::BinOp(BinOp::Div, _, _)) + && matches!(**rhs, Expr::Ref(ref s) if s == "C") + )); } #[test] fn unary_minus_before_pow() { - // `-A ^ B` — hand-rolled parser parses this as UnaryMinus(Ref(A)) - // followed by nothing because pow is inside unary's recursion. - // My grammar: unary = unary_minus | primary, unary_minus = "-" ~ primary. - // So `-A` is UnaryMinus(Ref(A)); `^B` can't attach because pow_expr - // sits OUTSIDE unary. Result: pow_expr(unary_minus(A)) — no pow - // because the optional pow_op doesn't have another unary to bind to. - // Verify: `-A ^ B` parses to BinOp(Pow, UnaryMinus(A), B). + // `-A ^ B` must parse as Pow(UnaryMinus(A), B). The `-` binds + // through `unary_minus = { "-" ~ primary }`, producing a unary + // node that is then used as the pow_expr's base. let f = parse_formula("X = -A ^ B", "Cat").unwrap(); - if let Expr::BinOp(BinOp::Pow, lhs, rhs) = &f.expr { - assert!(matches!(**lhs, Expr::UnaryMinus(_))); - assert!(matches!(**rhs, Expr::Ref(ref s) if s == "B")); - } else { - panic!("Expected Pow(UnaryMinus, Ref), got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Pow, lhs, rhs) + if matches!(**lhs, Expr::UnaryMinus(_)) + && matches!(**rhs, Expr::Ref(ref s) if s == "B") + )); } // ── Number literal variants ──────────────────────────────────────── @@ -767,11 +745,11 @@ mod tests { fn nested_sum_aggregate() { // Nested aggregates — outer SUM wrapping an inner SUM. let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap(); - if let Expr::Agg(AggFunc::Sum, outer_inner, None) = &f.expr { - assert!(matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None))); - } else { - panic!("Expected nested SUM aggregates, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::Agg(AggFunc::Sum, outer_inner, None) + if matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None)) + )); } #[test] @@ -785,12 +763,12 @@ mod tests { fn nested_if_expression() { // IF in the then-branch of another IF. let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap(); - if let Expr::If(_, then_e, else_e) = &f.expr { - assert!(matches!(**then_e, Expr::If(_, _, _))); - assert!(matches!(**else_e, Expr::Number(n) if n == 3.0)); - } else { - panic!("Expected outer IF, got: {:?}", f.expr); - } + assert!(matches!( + &f.expr, + Expr::If(_, then_e, else_e) + if matches!(**then_e, Expr::If(_, _, _)) + && matches!(**else_e, Expr::Number(n) if n == 3.0) + )); } // ── Whitespace tolerance ─────────────────────────────────────────── @@ -845,6 +823,34 @@ mod tests { assert_eq!(f.target, "my_target-name"); } + /// Regression: the hand-rolled tokenizer didn't allow `-` inside bare + /// identifiers, so references to persistence-legal names like + /// `east-coast` were silently parsed as `east - coast` (a subtraction + /// of two unrelated Refs). The pest grammar mirrors improv.pest's + /// `bare_name`, which does allow `-`, so a hyphenated name now parses + /// as a single identifier. + #[test] + fn hyphen_in_bare_identifier_reference() { + let f = parse_formula("Total = east-coast + west-coast", "Cat").unwrap(); + assert!(matches!( + &f.expr, + Expr::BinOp(BinOp::Add, lhs, rhs) + if matches!(**lhs, Expr::Ref(ref s) if s == "east-coast") + && matches!(**rhs, Expr::Ref(ref s) if s == "west-coast") + )); + } + + #[test] + fn hyphen_in_bare_identifier_inside_aggregate() { + // Same fix, but inside a SUM() to exercise the aggregate path as well. + let f = parse_formula("X = SUM(east-coast)", "Cat").unwrap(); + assert!(matches!( + &f.expr, + Expr::Agg(AggFunc::Sum, inner, None) + if matches!(**inner, Expr::Ref(ref s) if s == "east-coast") + )); + } + #[test] fn pipe_quoted_target_preserves_pipes() { let f = parse_formula("|My Target| = A", "Cat").unwrap(); @@ -907,13 +913,6 @@ mod generator { match expr { Expr::Str(s) => out.push_str(s), Expr::Insens(s) => out.push_str(s), - Expr::Range(lo, hi) => { - let lo = lo.chars().next().unwrap() as u32; - let hi = hi.chars().next().unwrap() as u32; - let range = hi - lo + 1; - let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap(); - out.push(ch); - } Expr::Ident(name) => self.emit_ident(name, out, atomic), Expr::Seq(a, b) => { self.emit(a, out, atomic); @@ -923,18 +922,19 @@ mod generator { self.emit(b, out, atomic); } Expr::Choice(a, b) => { - let mut alts: Vec<&Expr> = vec![a.as_ref()]; - let mut cur = b.as_ref(); - while let Expr::Choice(l, r) = cur { - alts.push(l.as_ref()); - cur = r.as_ref(); + // 50/50 between the two branches of each Choice. + // Nested Choices (used by rules with 3+ alternatives) + // recurse into themselves, so deeper branches are + // chosen less often than shallower ones — good + // enough for the random-generation use case. + if self.pick().is_multiple_of(2) { + self.emit(a, out, atomic); + } else { + self.emit(b, out, atomic); } - alts.push(cur); - let idx = self.pick() as usize % alts.len(); - self.emit(alts[idx], out, atomic); } Expr::Opt(inner) => { - // ~66% chance of emitting + // ~66% chance of emitting the optional branch. if !self.pick().is_multiple_of(3) { self.emit(inner, out, atomic); } @@ -949,20 +949,20 @@ mod generator { } } Expr::RepOnce(inner) => { + // formula.pest only uses `+` inside atomic rules + // (ASCII_DIGIT+), so inner repetitions never need + // whitespace separation. let count = 1 + self.pick() % 3; - for i in 0..count { - if i > 0 && !atomic { - out.push(' '); - } + for _ in 0..count { self.emit(inner, out, atomic); } } - Expr::NegPred(_) | Expr::PosPred(_) => { - // Lookaheads don't emit output. - } - _ => { - // Skip unsupported expressions. - } + // Lookaheads (NegPred, PosPred) don't emit output. Any + // other Expr variant (Range, RepExact, Push, PeekSlice, + // Skip, …) is unused by formula.pest and silently + // produces nothing; if the grammar starts using one, + // generated formulas will fail to parse and we'll know. + _ => {} } } @@ -984,21 +984,31 @@ mod generator { "NEWLINE" => out.push('\n'), "SOI" | "EOI" => {} _ => { - if let Some((ty, inner)) = self.rules.get(name).cloned() { - let inner_atomic = atomic - || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); - self.emit(&inner, out, inner_atomic); - } + // Every Ident in formula.pest refers to a rule that + // exists in the grammar file — if it doesn't, the + // grammar itself failed to compile and we wouldn't be + // running tests, so `expect` here is a bug marker. + let (ty, inner) = self + .rules + .get(name) + .cloned() + .expect("rule referenced by grammar exists"); + let inner_atomic = + atomic || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); + self.emit(&inner, out, inner_atomic); } } } pub fn generate(&mut self, rule_name: &str) -> String { let mut out = String::new(); - if let Some((ty, expr)) = self.rules.get(rule_name).cloned() { - let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); - self.emit(&expr, &mut out, atomic); - } + let (ty, expr) = self + .rules + .get(rule_name) + .cloned() + .expect("entry rule exists in formula.pest"); + let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); + self.emit(&expr, &mut out, atomic); out } }