refactor(parser): simplify tests and generator logic

Refactor formula parser tests to use more concise assert!(matches!(...))
syntax. Simplify the formula generator implementation by removing unused
expression variants and using expect() for mandatory grammar rules. Add a
regression test for hyphenated identifiers in bare names.

Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf)
This commit is contained in:
Edward Langley
2026-04-15 21:32:34 -07:00
parent 6d4b19a940
commit 23c7c530e3

View File

@ -385,13 +385,13 @@ mod tests {
#[test] #[test]
fn parse_sum_with_inline_where_filter() { fn parse_sum_with_inline_where_filter() {
let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap(); let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap();
if let Expr::Agg(AggFunc::Sum, inner, Some(filter)) = &f.expr { assert!(matches!(
assert!(matches!(**inner, Expr::Ref(_))); &f.expr,
assert_eq!(filter.category, "Region"); Expr::Agg(AggFunc::Sum, inner, Some(filter))
assert_eq!(filter.item, "East"); if matches!(**inner, Expr::Ref(_))
} else { && filter.category == "Region"
panic!("Expected SUM with inline WHERE filter, got: {:?}", f.expr); && filter.item == "East"
} ));
} }
// ── Comparison operators ──────────────────────────────────────────── // ── Comparison operators ────────────────────────────────────────────
@ -467,21 +467,20 @@ mod tests {
fn parse_aggregate_name_without_parens_is_ref() { fn parse_aggregate_name_without_parens_is_ref() {
// "SUM" without parens should be treated as a reference, not a function // "SUM" without parens should be treated as a reference, not a function
let f = parse_formula("X = SUM + 1", "Cat").unwrap(); let f = parse_formula("X = SUM + 1", "Cat").unwrap();
assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); assert!(matches!(
if let Expr::BinOp(_, lhs, _) = &f.expr { &f.expr,
assert!(matches!(**lhs, Expr::Ref(_))); Expr::BinOp(BinOp::Add, lhs, _) if matches!(**lhs, Expr::Ref(_))
} ));
} }
#[test] #[test]
fn parse_if_without_parens_is_ref() { fn parse_if_without_parens_is_ref() {
// "IF" without parens should be treated as a reference // "IF" without parens should be treated as a reference
let f = parse_formula("X = IF + 1", "Cat").unwrap(); let f = parse_formula("X = IF + 1", "Cat").unwrap();
if let Expr::BinOp(BinOp::Add, lhs, _) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::Ref(_))); &f.expr,
} else { Expr::BinOp(BinOp::Add, lhs, _) if matches!(**lhs, Expr::Ref(_))
panic!("Expected BinOp(Add), got: {:?}", f.expr); ));
}
} }
// ── Quoted string in WHERE ────────────────────────────────────────── // ── Quoted string in WHERE ──────────────────────────────────────────
@ -540,46 +539,46 @@ mod tests {
fn pipe_quoted_identifier_in_expression() { fn pipe_quoted_identifier_in_expression() {
let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Foo").unwrap(); let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Foo").unwrap();
assert_eq!(f.target, "|Total Revenue|"); assert_eq!(f.target, "|Total Revenue|");
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue")); &f.expr,
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Bonus")); Expr::BinOp(BinOp::Add, lhs, rhs)
} else { if matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue")
panic!("Expected Add, got: {:?}", f.expr); && matches!(**rhs, Expr::Ref(ref s) if s == "Bonus")
} ));
} }
#[test] #[test]
fn pipe_quoted_keyword_as_identifier() { fn pipe_quoted_keyword_as_identifier() {
// A category named "WHERE" can be referenced with pipes // A category named "WHERE" can be referenced with pipes
let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap(); let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap();
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "WHERE")); &f.expr,
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "SUM")); Expr::BinOp(BinOp::Add, lhs, rhs)
} else { if matches!(**lhs, Expr::Ref(ref s) if s == "WHERE")
panic!("Expected Add, got: {:?}", f.expr); && matches!(**rhs, Expr::Ref(ref s) if s == "SUM")
} ));
} }
#[test] #[test]
fn pipe_quoted_identifier_with_special_chars() { fn pipe_quoted_identifier_with_special_chars() {
// Pipes allow characters that would normally break tokenization // Pipes allow characters that would normally break tokenization
let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap(); let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap();
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)")); &f.expr,
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax")); Expr::BinOp(BinOp::Add, lhs, rhs)
} else { if matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)")
panic!("Expected Add, got: {:?}", f.expr); && matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax")
} ));
} }
#[test] #[test]
fn pipe_quoted_in_aggregate() { fn pipe_quoted_in_aggregate() {
let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap(); let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap();
if let Expr::Agg(AggFunc::Sum, inner, None) = &f.expr { assert!(matches!(
assert!(matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue")); &f.expr,
} else { Expr::Agg(AggFunc::Sum, inner, None)
panic!("Expected SUM aggregate, got: {:?}", f.expr); if matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue")
} ));
} }
#[test] #[test]
@ -593,12 +592,11 @@ mod tests {
fn pipe_quoted_in_inline_where() { fn pipe_quoted_in_inline_where() {
let f = let f =
parse_formula("X = SUM(Revenue WHERE |Region Name| = |East Coast|)", "Foo").unwrap(); parse_formula("X = SUM(Revenue WHERE |Region Name| = |East Coast|)", "Foo").unwrap();
if let Expr::Agg(AggFunc::Sum, _, Some(filter)) = &f.expr { assert!(matches!(
assert_eq!(filter.category, "Region Name"); &f.expr,
assert_eq!(filter.item, "East Coast"); Expr::Agg(AggFunc::Sum, _, Some(filter))
} else { if filter.category == "Region Name" && filter.item == "East Coast"
panic!("Expected SUM with WHERE filter, got: {:?}", f.expr); ));
}
} }
// ── Pipe-quoted escape semantics ──────────────────────────────────── // ── Pipe-quoted escape semantics ────────────────────────────────────
@ -607,33 +605,21 @@ mod tests {
fn pipe_quoted_escape_literal_pipe() { fn pipe_quoted_escape_literal_pipe() {
// \| inside a pipe-quoted identifier is a literal pipe // \| inside a pipe-quoted identifier is a literal pipe
let f = parse_formula("X = |A\\|B|", "Cat").unwrap(); let f = parse_formula("X = |A\\|B|", "Cat").unwrap();
if let Expr::Ref(ref s) = f.expr { assert!(matches!(&f.expr, Expr::Ref(s) if s == "A|B"));
assert_eq!(s, "A|B");
} else {
panic!("Expected Ref, got: {:?}", f.expr);
}
} }
#[test] #[test]
fn pipe_quoted_escape_double_backslash() { fn pipe_quoted_escape_double_backslash() {
// \\ inside a pipe-quoted identifier is a literal backslash // \\ inside a pipe-quoted identifier is a literal backslash
let f = parse_formula("X = |A\\\\B|", "Cat").unwrap(); let f = parse_formula("X = |A\\\\B|", "Cat").unwrap();
if let Expr::Ref(ref s) = f.expr { assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\\B"));
assert_eq!(s, "A\\B");
} else {
panic!("Expected Ref, got: {:?}", f.expr);
}
} }
#[test] #[test]
fn pipe_quoted_escape_newline() { fn pipe_quoted_escape_newline() {
// \n inside a pipe-quoted identifier is a literal newline // \n inside a pipe-quoted identifier is a literal newline
let f = parse_formula("X = |A\\nB|", "Cat").unwrap(); let f = parse_formula("X = |A\\nB|", "Cat").unwrap();
if let Expr::Ref(ref s) = f.expr { assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\nB"));
assert_eq!(s, "A\nB");
} else {
panic!("Expected Ref, got: {:?}", f.expr);
}
} }
#[test] #[test]
@ -642,11 +628,7 @@ mod tests {
// backslash-plus-character. The grammar's `"\\" ~ ANY` allows // backslash-plus-character. The grammar's `"\\" ~ ANY` allows
// any following character; we just don't interpret it. // any following character; we just don't interpret it.
let f = parse_formula("X = |A\\zB|", "Cat").unwrap(); let f = parse_formula("X = |A\\zB|", "Cat").unwrap();
if let Expr::Ref(ref s) = f.expr { assert!(matches!(&f.expr, Expr::Ref(s) if s == "A\\zB"));
assert_eq!(s, "A\\zB");
} else {
panic!("Expected Ref, got: {:?}", f.expr);
}
} }
// ── Operator precedence and associativity ────────────────────────── // ── Operator precedence and associativity ──────────────────────────
@ -655,66 +637,62 @@ mod tests {
fn mul_binds_tighter_than_add() { fn mul_binds_tighter_than_add() {
// `A + B * C` must parse as Add(A, Mul(B, C)) // `A + B * C` must parse as Add(A, Mul(B, C))
let f = parse_formula("X = A + B * C", "Cat").unwrap(); let f = parse_formula("X = A + B * C", "Cat").unwrap();
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::Ref(_))); &f.expr,
assert!(matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _))); Expr::BinOp(BinOp::Add, lhs, rhs)
} else { if matches!(**lhs, Expr::Ref(_))
panic!("Expected Add with Mul on rhs, got: {:?}", f.expr); && matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _))
} ));
} }
#[test] #[test]
fn pow_binds_tighter_than_mul() { fn pow_binds_tighter_than_mul() {
// `A * B ^ C` must parse as Mul(A, Pow(B, C)) // `A * B ^ C` must parse as Mul(A, Pow(B, C))
let f = parse_formula("X = A * B ^ C", "Cat").unwrap(); let f = parse_formula("X = A * B ^ C", "Cat").unwrap();
if let Expr::BinOp(BinOp::Mul, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::Ref(_))); &f.expr,
assert!(matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _))); Expr::BinOp(BinOp::Mul, lhs, rhs)
} else { if matches!(**lhs, Expr::Ref(_))
panic!("Expected Mul with Pow on rhs, got: {:?}", f.expr); && matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _))
} ));
} }
#[test] #[test]
fn subtraction_is_left_associative() { fn subtraction_is_left_associative() {
// `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C)) // `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C))
let f = parse_formula("X = A - B - C", "Cat").unwrap(); let f = parse_formula("X = A - B - C", "Cat").unwrap();
if let Expr::BinOp(BinOp::Sub, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _))); &f.expr,
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); Expr::BinOp(BinOp::Sub, lhs, rhs)
} else { if matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _))
panic!("Expected left-associative Sub, got: {:?}", f.expr); && matches!(**rhs, Expr::Ref(ref s) if s == "C")
} ));
} }
#[test] #[test]
fn division_is_left_associative() { fn division_is_left_associative() {
// `A / B / C` must parse as Div(Div(A, B), C) // `A / B / C` must parse as Div(Div(A, B), C)
let f = parse_formula("X = A / B / C", "Cat").unwrap(); let f = parse_formula("X = A / B / C", "Cat").unwrap();
if let Expr::BinOp(BinOp::Div, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::BinOp(BinOp::Div, _, _))); &f.expr,
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); Expr::BinOp(BinOp::Div, lhs, rhs)
} else { if matches!(**lhs, Expr::BinOp(BinOp::Div, _, _))
panic!("Expected left-associative Div, got: {:?}", f.expr); && matches!(**rhs, Expr::Ref(ref s) if s == "C")
} ));
} }
#[test] #[test]
fn unary_minus_before_pow() { fn unary_minus_before_pow() {
// `-A ^ B` — hand-rolled parser parses this as UnaryMinus(Ref(A)) // `-A ^ B` must parse as Pow(UnaryMinus(A), B). The `-` binds
// followed by nothing because pow is inside unary's recursion. // through `unary_minus = { "-" ~ primary }`, producing a unary
// My grammar: unary = unary_minus | primary, unary_minus = "-" ~ primary. // node that is then used as the pow_expr's base.
// So `-A` is UnaryMinus(Ref(A)); `^B` can't attach because pow_expr
// sits OUTSIDE unary. Result: pow_expr(unary_minus(A)) — no pow
// because the optional pow_op doesn't have another unary to bind to.
// Verify: `-A ^ B` parses to BinOp(Pow, UnaryMinus(A), B).
let f = parse_formula("X = -A ^ B", "Cat").unwrap(); let f = parse_formula("X = -A ^ B", "Cat").unwrap();
if let Expr::BinOp(BinOp::Pow, lhs, rhs) = &f.expr { assert!(matches!(
assert!(matches!(**lhs, Expr::UnaryMinus(_))); &f.expr,
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "B")); Expr::BinOp(BinOp::Pow, lhs, rhs)
} else { if matches!(**lhs, Expr::UnaryMinus(_))
panic!("Expected Pow(UnaryMinus, Ref), got: {:?}", f.expr); && matches!(**rhs, Expr::Ref(ref s) if s == "B")
} ));
} }
// ── Number literal variants ──────────────────────────────────────── // ── Number literal variants ────────────────────────────────────────
@ -767,11 +745,11 @@ mod tests {
fn nested_sum_aggregate() { fn nested_sum_aggregate() {
// Nested aggregates — outer SUM wrapping an inner SUM. // Nested aggregates — outer SUM wrapping an inner SUM.
let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap(); let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap();
if let Expr::Agg(AggFunc::Sum, outer_inner, None) = &f.expr { assert!(matches!(
assert!(matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None))); &f.expr,
} else { Expr::Agg(AggFunc::Sum, outer_inner, None)
panic!("Expected nested SUM aggregates, got: {:?}", f.expr); if matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None))
} ));
} }
#[test] #[test]
@ -785,12 +763,12 @@ mod tests {
fn nested_if_expression() { fn nested_if_expression() {
// IF in the then-branch of another IF. // IF in the then-branch of another IF.
let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap(); let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap();
if let Expr::If(_, then_e, else_e) = &f.expr { assert!(matches!(
assert!(matches!(**then_e, Expr::If(_, _, _))); &f.expr,
assert!(matches!(**else_e, Expr::Number(n) if n == 3.0)); Expr::If(_, then_e, else_e)
} else { if matches!(**then_e, Expr::If(_, _, _))
panic!("Expected outer IF, got: {:?}", f.expr); && matches!(**else_e, Expr::Number(n) if n == 3.0)
} ));
} }
// ── Whitespace tolerance ─────────────────────────────────────────── // ── Whitespace tolerance ───────────────────────────────────────────
@ -845,6 +823,34 @@ mod tests {
assert_eq!(f.target, "my_target-name"); assert_eq!(f.target, "my_target-name");
} }
/// Regression: the hand-rolled tokenizer didn't allow `-` inside bare
/// identifiers, so references to persistence-legal names like
/// `east-coast` were silently parsed as `east - coast` (a subtraction
/// of two unrelated Refs). The pest grammar mirrors improv.pest's
/// `bare_name`, which does allow `-`, so a hyphenated name now parses
/// as a single identifier.
#[test]
fn hyphen_in_bare_identifier_reference() {
let f = parse_formula("Total = east-coast + west-coast", "Cat").unwrap();
assert!(matches!(
&f.expr,
Expr::BinOp(BinOp::Add, lhs, rhs)
if matches!(**lhs, Expr::Ref(ref s) if s == "east-coast")
&& matches!(**rhs, Expr::Ref(ref s) if s == "west-coast")
));
}
#[test]
fn hyphen_in_bare_identifier_inside_aggregate() {
// Same fix, but inside a SUM() to exercise the aggregate path as well.
let f = parse_formula("X = SUM(east-coast)", "Cat").unwrap();
assert!(matches!(
&f.expr,
Expr::Agg(AggFunc::Sum, inner, None)
if matches!(**inner, Expr::Ref(ref s) if s == "east-coast")
));
}
#[test] #[test]
fn pipe_quoted_target_preserves_pipes() { fn pipe_quoted_target_preserves_pipes() {
let f = parse_formula("|My Target| = A", "Cat").unwrap(); let f = parse_formula("|My Target| = A", "Cat").unwrap();
@ -907,13 +913,6 @@ mod generator {
match expr { match expr {
Expr::Str(s) => out.push_str(s), Expr::Str(s) => out.push_str(s),
Expr::Insens(s) => out.push_str(s), Expr::Insens(s) => out.push_str(s),
Expr::Range(lo, hi) => {
let lo = lo.chars().next().unwrap() as u32;
let hi = hi.chars().next().unwrap() as u32;
let range = hi - lo + 1;
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
out.push(ch);
}
Expr::Ident(name) => self.emit_ident(name, out, atomic), Expr::Ident(name) => self.emit_ident(name, out, atomic),
Expr::Seq(a, b) => { Expr::Seq(a, b) => {
self.emit(a, out, atomic); self.emit(a, out, atomic);
@ -923,18 +922,19 @@ mod generator {
self.emit(b, out, atomic); self.emit(b, out, atomic);
} }
Expr::Choice(a, b) => { Expr::Choice(a, b) => {
let mut alts: Vec<&Expr> = vec![a.as_ref()]; // 50/50 between the two branches of each Choice.
let mut cur = b.as_ref(); // Nested Choices (used by rules with 3+ alternatives)
while let Expr::Choice(l, r) = cur { // recurse into themselves, so deeper branches are
alts.push(l.as_ref()); // chosen less often than shallower ones — good
cur = r.as_ref(); // enough for the random-generation use case.
if self.pick().is_multiple_of(2) {
self.emit(a, out, atomic);
} else {
self.emit(b, out, atomic);
} }
alts.push(cur);
let idx = self.pick() as usize % alts.len();
self.emit(alts[idx], out, atomic);
} }
Expr::Opt(inner) => { Expr::Opt(inner) => {
// ~66% chance of emitting // ~66% chance of emitting the optional branch.
if !self.pick().is_multiple_of(3) { if !self.pick().is_multiple_of(3) {
self.emit(inner, out, atomic); self.emit(inner, out, atomic);
} }
@ -949,20 +949,20 @@ mod generator {
} }
} }
Expr::RepOnce(inner) => { Expr::RepOnce(inner) => {
// formula.pest only uses `+` inside atomic rules
// (ASCII_DIGIT+), so inner repetitions never need
// whitespace separation.
let count = 1 + self.pick() % 3; let count = 1 + self.pick() % 3;
for i in 0..count { for _ in 0..count {
if i > 0 && !atomic {
out.push(' ');
}
self.emit(inner, out, atomic); self.emit(inner, out, atomic);
} }
} }
Expr::NegPred(_) | Expr::PosPred(_) => { // Lookaheads (NegPred, PosPred) don't emit output. Any
// Lookaheads don't emit output. // other Expr variant (Range, RepExact, Push, PeekSlice,
} // Skip, …) is unused by formula.pest and silently
_ => { // produces nothing; if the grammar starts using one,
// Skip unsupported expressions. // generated formulas will fail to parse and we'll know.
} _ => {}
} }
} }
@ -984,21 +984,31 @@ mod generator {
"NEWLINE" => out.push('\n'), "NEWLINE" => out.push('\n'),
"SOI" | "EOI" => {} "SOI" | "EOI" => {}
_ => { _ => {
if let Some((ty, inner)) = self.rules.get(name).cloned() { // Every Ident in formula.pest refers to a rule that
let inner_atomic = atomic // exists in the grammar file — if it doesn't, the
|| matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); // grammar itself failed to compile and we wouldn't be
self.emit(&inner, out, inner_atomic); // running tests, so `expect` here is a bug marker.
} let (ty, inner) = self
.rules
.get(name)
.cloned()
.expect("rule referenced by grammar exists");
let inner_atomic =
atomic || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic);
self.emit(&inner, out, inner_atomic);
} }
} }
} }
pub fn generate(&mut self, rule_name: &str) -> String { pub fn generate(&mut self, rule_name: &str) -> String {
let mut out = String::new(); let mut out = String::new();
if let Some((ty, expr)) = self.rules.get(rule_name).cloned() { let (ty, expr) = self
let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); .rules
self.emit(&expr, &mut out, atomic); .get(rule_name)
} .cloned()
.expect("entry rule exists in formula.pest");
let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic);
self.emit(&expr, &mut out, atomic);
out out
} }
} }