refactor!(formula): migrate parser to use pest
Replace the manual tokenizer and recursive descent parser with a PEG grammar using the pest library. This migration involves introducing a formal grammar in formula.pest and updating the parser implementation to utilize the generated Pest parser with a tree-walking approach to construct the AST. The change introduces a stricter requirement for identifiers: multi-word identifiers must now be enclosed in pipe quotes (e.g., |Total Revenue|) and are no longer accepted as bare words. Tests have been updated to reflect the new parsing logic, remove tokenizer-specific tests, and verify the new pipe-quoting and escape semantics. BREAKING CHANGE: Multi-word identifiers now require pipe-quoting (e.g. |Total Revenue|) and are no longer accepted as bare words. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf)
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -756,6 +756,8 @@ name = "improvise-formula"
|
|||||||
version = "0.1.0-rc2"
|
version = "0.1.0-rc2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"pest",
|
||||||
|
"pest_derive",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@ -8,4 +8,6 @@ repository = "https://github.com/fiddlerwoaroof/improvise"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
|
pest = "2.8.6"
|
||||||
|
pest_derive = "2.8.6"
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
|||||||
91
crates/improvise-formula/src/formula.pest
Normal file
91
crates/improvise-formula/src/formula.pest
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
// Formula grammar for improvise.
|
||||||
|
//
|
||||||
|
// A formula has the form: TARGET = EXPR [WHERE filter]
|
||||||
|
// See parser.rs for the tree walker that produces a Formula AST.
|
||||||
|
//
|
||||||
|
// Identifier rules (bare_ident / pipe_quoted) mirror `bare_name` and
|
||||||
|
// `pipe_quoted` in src/persistence/improv.pest: bare identifiers are
|
||||||
|
// alphanumeric plus `_` and `-`, with no internal spaces; multi-word
|
||||||
|
// names must be pipe-quoted.
|
||||||
|
|
||||||
|
// Auto-skip horizontal whitespace between tokens in non-atomic rules.
|
||||||
|
WHITESPACE = _{ " " | "\t" }
|
||||||
|
|
||||||
|
// ---- top-level ----------------------------------------------------------
|
||||||
|
|
||||||
|
formula = { SOI ~ target ~ "=" ~ expr ~ where_clause? ~ EOI }
|
||||||
|
|
||||||
|
// The target keeps its raw text (including pipes, if any) — we capture
|
||||||
|
// the span directly rather than walking into its children.
|
||||||
|
target = { identifier }
|
||||||
|
|
||||||
|
where_clause = { ^"WHERE" ~ identifier ~ "=" ~ filter_value }
|
||||||
|
|
||||||
|
// ---- expressions --------------------------------------------------------
|
||||||
|
|
||||||
|
// Used by parse_expr() — forces a standalone expression to consume the
|
||||||
|
// whole input, so `1 + 2 3` fails instead of silently dropping " 3".
|
||||||
|
expr_eoi = { SOI ~ expr ~ EOI }
|
||||||
|
|
||||||
|
expr = { add_expr }
|
||||||
|
|
||||||
|
add_expr = { mul_expr ~ (add_op ~ mul_expr)* }
|
||||||
|
add_op = { "+" | "-" }
|
||||||
|
|
||||||
|
mul_expr = { pow_expr ~ (mul_op ~ pow_expr)* }
|
||||||
|
mul_op = { "*" | "/" }
|
||||||
|
|
||||||
|
pow_expr = { unary ~ (pow_op ~ unary)? }
|
||||||
|
pow_op = { "^" }
|
||||||
|
|
||||||
|
unary = { unary_minus | primary }
|
||||||
|
unary_minus = { "-" ~ primary }
|
||||||
|
|
||||||
|
primary = {
|
||||||
|
number
|
||||||
|
| agg_call
|
||||||
|
| if_expr
|
||||||
|
| paren_expr
|
||||||
|
| ref_expr
|
||||||
|
}
|
||||||
|
|
||||||
|
paren_expr = { "(" ~ expr ~ ")" }
|
||||||
|
|
||||||
|
// Aggregates with optional inline WHERE filter inside the parens.
|
||||||
|
agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" }
|
||||||
|
agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" }
|
||||||
|
inline_where = { ^"WHERE" ~ identifier ~ "=" ~ filter_value }
|
||||||
|
|
||||||
|
// IF(cond, then, else). Comparison is a standalone rule because comparison
|
||||||
|
// operators are not valid in general expressions — only inside an IF condition.
|
||||||
|
if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" }
|
||||||
|
comparison = { expr ~ cmp_op ~ expr }
|
||||||
|
cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" }
|
||||||
|
|
||||||
|
// A reference to an item. `SUM` and `IF` without parens fall through to
|
||||||
|
// this rule because agg_call / if_expr require a "(" and otherwise fail.
|
||||||
|
ref_expr = { identifier }
|
||||||
|
|
||||||
|
// ---- identifiers --------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Mirror of improv.pest's bare_name / pipe_quoted.
|
||||||
|
|
||||||
|
identifier = ${ pipe_quoted | bare_ident }
|
||||||
|
|
||||||
|
// Backslash escapes inside pipes: \| literal pipe, \\ backslash, \n newline.
|
||||||
|
pipe_quoted = @{ "|" ~ ("\\" ~ ANY | !"|" ~ ANY)* ~ "|" }
|
||||||
|
|
||||||
|
bare_ident = @{
|
||||||
|
(ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")*
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- literal values -----------------------------------------------------
|
||||||
|
|
||||||
|
filter_value = { string | pipe_quoted | bare_ident }
|
||||||
|
|
||||||
|
string = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" }
|
||||||
|
|
||||||
|
number = @{
|
||||||
|
ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)?
|
||||||
|
| "." ~ ASCII_DIGIT+
|
||||||
|
}
|
||||||
@ -1,462 +1,321 @@
|
|||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
|
use pest::Parser as _;
|
||||||
|
use pest::iterators::Pair;
|
||||||
|
use pest_derive::Parser;
|
||||||
|
|
||||||
use super::ast::{AggFunc, BinOp, Expr, Filter, Formula};
|
use super::ast::{AggFunc, BinOp, Expr, Filter, Formula};
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[grammar = "formula.pest"]
|
||||||
|
struct FormulaParser;
|
||||||
|
|
||||||
/// Parse a formula string like "Profit = Revenue - Cost"
|
/// Parse a formula string like "Profit = Revenue - Cost"
|
||||||
/// or "Tax = Revenue * 0.08 WHERE Region = \"East\""
|
/// or "Tax = Revenue * 0.08 WHERE Region = \"East\""
|
||||||
pub fn parse_formula(raw: &str, target_category: &str) -> Result<Formula> {
|
pub fn parse_formula(raw: &str, target_category: &str) -> Result<Formula> {
|
||||||
let raw = raw.trim();
|
let input = raw.trim();
|
||||||
|
let formula_pair = FormulaParser::parse(Rule::formula, input)
|
||||||
|
.map_err(|e| anyhow!("{}", e))?
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("empty parse result"))?;
|
||||||
|
build_formula(formula_pair, input, target_category)
|
||||||
|
}
|
||||||
|
|
||||||
// Split on first `=` to get target = expression
|
/// Parse a bare expression (no target, no top-level WHERE clause).
|
||||||
let eq_pos = raw
|
/// Fails if the input contains trailing tokens after a complete expression.
|
||||||
.find('=')
|
pub fn parse_expr(s: &str) -> Result<Expr> {
|
||||||
.ok_or_else(|| anyhow!("Formula must contain '=': {raw}"))?;
|
let input = s.trim();
|
||||||
let target = raw[..eq_pos].trim().to_string();
|
let expr_pair = FormulaParser::parse(Rule::expr_eoi, input)
|
||||||
let rest = raw[eq_pos + 1..].trim();
|
.map_err(|e| anyhow!("{}", e))?
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("empty parse result"))?
|
||||||
|
.into_inner()
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing expression in expr_eoi"))?;
|
||||||
|
build_expr(expr_pair)
|
||||||
|
}
|
||||||
|
|
||||||
// Check for WHERE clause at top level
|
// ---- tree walkers -------------------------------------------------------
|
||||||
let (expr_str, filter) = split_where(rest);
|
|
||||||
let filter = filter.map(parse_where).transpose()?;
|
|
||||||
|
|
||||||
let expr = parse_expr(expr_str.trim())?;
|
|
||||||
|
|
||||||
|
fn build_formula(pair: Pair<Rule>, raw: &str, target_category: &str) -> Result<Formula> {
|
||||||
|
let mut target = None;
|
||||||
|
let mut expr = None;
|
||||||
|
let mut filter = None;
|
||||||
|
for inner in pair.into_inner() {
|
||||||
|
match inner.as_rule() {
|
||||||
|
Rule::target => target = Some(inner.as_str().trim().to_string()),
|
||||||
|
Rule::expr => expr = Some(build_expr(inner)?),
|
||||||
|
Rule::where_clause => filter = Some(build_filter(inner)?),
|
||||||
|
Rule::EOI => {}
|
||||||
|
r => return Err(anyhow!("unexpected rule in formula: {:?}", r)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let target = target.ok_or_else(|| anyhow!("missing target in formula"))?;
|
||||||
|
let expr = expr.ok_or_else(|| anyhow!("missing expression in formula"))?;
|
||||||
Ok(Formula::new(raw, target, target_category, expr, filter))
|
Ok(Formula::new(raw, target, target_category, expr, filter))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn split_where(s: &str) -> (&str, Option<&str>) {
|
fn build_expr(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
// Find WHERE not inside parens or quotes
|
// expr = { add_expr }
|
||||||
let bytes = s.as_bytes();
|
build_add_expr(first_inner(pair, "expr")?)
|
||||||
let mut depth = 0i32;
|
|
||||||
let mut i = 0;
|
|
||||||
while i < bytes.len() {
|
|
||||||
match bytes[i] {
|
|
||||||
b'(' => depth += 1,
|
|
||||||
b')' => depth -= 1,
|
|
||||||
b'"' => {
|
|
||||||
i += 1;
|
|
||||||
while i < bytes.len() && bytes[i] != b'"' {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b'|' => {
|
|
||||||
i += 1;
|
|
||||||
while i < bytes.len() && bytes[i] != b'|' {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ if depth == 0 => {
|
|
||||||
if s[i..].to_ascii_uppercase().starts_with("WHERE") {
|
|
||||||
let before = &s[..i];
|
|
||||||
let after = &s[i + 5..];
|
|
||||||
if before.ends_with(char::is_whitespace) || i == 0 {
|
|
||||||
return (before.trim(), Some(after.trim()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
(s, None)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Strip pipe or double-quote delimiters from a value.
|
fn build_add_expr(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
fn unquote(s: &str) -> String {
|
fold_left_binop(pair, build_mul_expr, |s| match s {
|
||||||
let s = s.trim();
|
"+" => Some(BinOp::Add),
|
||||||
if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) {
|
"-" => Some(BinOp::Sub),
|
||||||
s[1..s.len() - 1].to_string()
|
_ => None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_mul_expr(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
fold_left_binop(pair, build_pow_expr, |s| match s {
|
||||||
|
"*" => Some(BinOp::Mul),
|
||||||
|
"/" => Some(BinOp::Div),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_pow_expr(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
// pow_expr = { unary ~ (pow_op ~ unary)? }
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
let base_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("empty pow_expr"))?;
|
||||||
|
let base = build_unary(base_pair)?;
|
||||||
|
match pairs.next() {
|
||||||
|
None => Ok(base),
|
||||||
|
Some(op_pair) => {
|
||||||
|
debug_assert_eq!(op_pair.as_rule(), Rule::pow_op);
|
||||||
|
let exp_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing exponent in pow_expr"))?;
|
||||||
|
let exp = build_unary(exp_pair)?;
|
||||||
|
Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_unary(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
// unary = { unary_minus | primary }
|
||||||
|
let inner = first_inner(pair, "unary")?;
|
||||||
|
match inner.as_rule() {
|
||||||
|
Rule::unary_minus => {
|
||||||
|
let prim = first_inner(inner, "unary_minus")?;
|
||||||
|
Ok(Expr::UnaryMinus(Box::new(build_primary(prim)?)))
|
||||||
|
}
|
||||||
|
Rule::primary => build_primary(inner),
|
||||||
|
r => Err(anyhow!("unexpected rule in unary: {:?}", r)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_primary(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
// primary = { number | agg_call | if_expr | paren_expr | ref_expr }
|
||||||
|
let inner = first_inner(pair, "primary")?;
|
||||||
|
match inner.as_rule() {
|
||||||
|
Rule::number => Ok(Expr::Number(inner.as_str().parse()?)),
|
||||||
|
Rule::agg_call => build_agg_call(inner),
|
||||||
|
Rule::if_expr => build_if_expr(inner),
|
||||||
|
Rule::paren_expr => build_expr(first_inner(inner, "paren_expr")?),
|
||||||
|
Rule::ref_expr => {
|
||||||
|
let id_pair = first_inner(inner, "ref_expr")?;
|
||||||
|
Ok(Expr::Ref(identifier_to_string(id_pair)))
|
||||||
|
}
|
||||||
|
r => Err(anyhow!("unexpected rule in primary: {:?}", r)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_agg_call(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
// agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" }
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
let func_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing agg_func"))?;
|
||||||
|
let func = parse_agg_func(func_pair.as_str())?;
|
||||||
|
let expr_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing aggregate argument"))?;
|
||||||
|
let inner_expr = build_expr(expr_pair)?;
|
||||||
|
let filter = match pairs.next() {
|
||||||
|
Some(p) if p.as_rule() == Rule::inline_where => Some(build_filter(p)?),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
Ok(Expr::Agg(func, Box::new(inner_expr), filter))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_agg_func(s: &str) -> Result<AggFunc> {
|
||||||
|
match s.to_ascii_uppercase().as_str() {
|
||||||
|
"SUM" => Ok(AggFunc::Sum),
|
||||||
|
"AVG" => Ok(AggFunc::Avg),
|
||||||
|
"MIN" => Ok(AggFunc::Min),
|
||||||
|
"MAX" => Ok(AggFunc::Max),
|
||||||
|
"COUNT" => Ok(AggFunc::Count),
|
||||||
|
f => Err(anyhow!("unknown aggregate function: {}", f)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_if_expr(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
// if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" }
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
let cond_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing IF condition"))?;
|
||||||
|
let cond = build_comparison(cond_pair)?;
|
||||||
|
let then_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing IF then-branch"))?;
|
||||||
|
let then_e = build_expr(then_pair)?;
|
||||||
|
let else_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing IF else-branch"))?;
|
||||||
|
let else_e = build_expr(else_pair)?;
|
||||||
|
Ok(Expr::If(
|
||||||
|
Box::new(cond),
|
||||||
|
Box::new(then_e),
|
||||||
|
Box::new(else_e),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_comparison(pair: Pair<Rule>) -> Result<Expr> {
|
||||||
|
// comparison = { expr ~ cmp_op ~ expr }
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
let lhs_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing comparison lhs"))?;
|
||||||
|
let lhs = build_expr(lhs_pair)?;
|
||||||
|
let op_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing comparison operator"))?;
|
||||||
|
let op = parse_cmp_op(op_pair.as_str())?;
|
||||||
|
let rhs_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing comparison rhs"))?;
|
||||||
|
let rhs = build_expr(rhs_pair)?;
|
||||||
|
Ok(Expr::BinOp(op, Box::new(lhs), Box::new(rhs)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_cmp_op(s: &str) -> Result<BinOp> {
|
||||||
|
match s {
|
||||||
|
"=" => Ok(BinOp::Eq),
|
||||||
|
"!=" => Ok(BinOp::Ne),
|
||||||
|
"<" => Ok(BinOp::Lt),
|
||||||
|
">" => Ok(BinOp::Gt),
|
||||||
|
"<=" => Ok(BinOp::Le),
|
||||||
|
">=" => Ok(BinOp::Ge),
|
||||||
|
o => Err(anyhow!("unknown comparison operator: {}", o)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_filter(pair: Pair<Rule>) -> Result<Filter> {
|
||||||
|
// where_clause / inline_where both have shape:
|
||||||
|
// ^"WHERE" ~ identifier ~ "=" ~ filter_value
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
let cat_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing WHERE category"))?;
|
||||||
|
let category = identifier_to_string(cat_pair);
|
||||||
|
let val_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing WHERE value"))?;
|
||||||
|
let item = filter_value_to_string(val_pair);
|
||||||
|
Ok(Filter { category, item })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn filter_value_to_string(pair: Pair<Rule>) -> String {
|
||||||
|
// filter_value = { string | pipe_quoted | bare_ident }
|
||||||
|
let inner = pair
|
||||||
|
.into_inner()
|
||||||
|
.next()
|
||||||
|
.expect("filter_value must have an inner pair");
|
||||||
|
let s = inner.as_str();
|
||||||
|
match inner.as_rule() {
|
||||||
|
Rule::string => strip_string_quotes(s),
|
||||||
|
Rule::pipe_quoted => unquote_pipe(s),
|
||||||
|
_ => s.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert an identifier pair (identifier, pipe_quoted, or bare_ident) to
|
||||||
|
/// its content string. Pipe-quoted identifiers have their delimiters
|
||||||
|
/// stripped and backslash escapes applied; bare identifiers are returned
|
||||||
|
/// verbatim.
|
||||||
|
fn identifier_to_string(pair: Pair<Rule>) -> String {
|
||||||
|
let s = pair.as_str();
|
||||||
|
if is_pipe_quoted(s) {
|
||||||
|
unquote_pipe(s)
|
||||||
} else {
|
} else {
|
||||||
s.to_string()
|
s.to_string()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_where(s: &str) -> Result<Filter> {
|
fn is_pipe_quoted(s: &str) -> bool {
|
||||||
// Format: Category = "Item" or Category = |Item| or Category = Item
|
s.len() >= 2 && s.starts_with('|') && s.ends_with('|')
|
||||||
let eq_pos = s
|
|
||||||
.find('=')
|
|
||||||
.ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?;
|
|
||||||
let category = unquote(&s[..eq_pos]);
|
|
||||||
let item = unquote(&s[eq_pos + 1..]);
|
|
||||||
Ok(Filter { category, item })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse an expression using recursive descent
|
fn strip_string_quotes(s: &str) -> String {
|
||||||
pub fn parse_expr(s: &str) -> Result<Expr> {
|
debug_assert!(s.len() >= 2 && s.starts_with('"') && s.ends_with('"'));
|
||||||
let tokens = tokenize(s)?;
|
s[1..s.len() - 1].to_string()
|
||||||
let mut pos = 0;
|
|
||||||
let expr = parse_add_sub(&tokens, &mut pos)?;
|
|
||||||
if pos < tokens.len() {
|
|
||||||
return Err(anyhow!(
|
|
||||||
"Unexpected token at position {pos}: {:?}",
|
|
||||||
tokens[pos]
|
|
||||||
));
|
|
||||||
}
|
|
||||||
Ok(expr)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
/// Strip surrounding pipes and apply backslash escapes: `\|` → `|`,
|
||||||
enum Token {
|
/// `\\` → `\`, `\n` → newline. Matches the escape semantics documented
|
||||||
Number(f64),
|
/// in src/persistence/improv.pest.
|
||||||
Ident(String),
|
fn unquote_pipe(s: &str) -> String {
|
||||||
Str(String),
|
debug_assert!(is_pipe_quoted(s));
|
||||||
Plus,
|
let inner = &s[1..s.len() - 1];
|
||||||
Minus,
|
let mut out = String::with_capacity(inner.len());
|
||||||
Star,
|
let mut chars = inner.chars();
|
||||||
Slash,
|
while let Some(c) = chars.next() {
|
||||||
Caret,
|
if c == '\\' {
|
||||||
LParen,
|
match chars.next() {
|
||||||
RParen,
|
Some('|') => out.push('|'),
|
||||||
Comma,
|
Some('\\') => out.push('\\'),
|
||||||
Eq,
|
Some('n') => out.push('\n'),
|
||||||
Ne,
|
Some(other) => {
|
||||||
Lt,
|
out.push('\\');
|
||||||
Gt,
|
out.push(other);
|
||||||
Le,
|
|
||||||
Ge,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn tokenize(s: &str) -> Result<Vec<Token>> {
|
|
||||||
let mut tokens = Vec::new();
|
|
||||||
let chars: Vec<char> = s.chars().collect();
|
|
||||||
let mut i = 0;
|
|
||||||
|
|
||||||
while i < chars.len() {
|
|
||||||
match chars[i] {
|
|
||||||
' ' | '\t' | '\n' => i += 1,
|
|
||||||
'+' => {
|
|
||||||
tokens.push(Token::Plus);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'-' => {
|
|
||||||
tokens.push(Token::Minus);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'*' => {
|
|
||||||
tokens.push(Token::Star);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'/' => {
|
|
||||||
tokens.push(Token::Slash);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'^' => {
|
|
||||||
tokens.push(Token::Caret);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'(' => {
|
|
||||||
tokens.push(Token::LParen);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
')' => {
|
|
||||||
tokens.push(Token::RParen);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
',' => {
|
|
||||||
tokens.push(Token::Comma);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'!' if chars.get(i + 1) == Some(&'=') => {
|
|
||||||
tokens.push(Token::Ne);
|
|
||||||
i += 2;
|
|
||||||
}
|
|
||||||
'<' if chars.get(i + 1) == Some(&'=') => {
|
|
||||||
tokens.push(Token::Le);
|
|
||||||
i += 2;
|
|
||||||
}
|
|
||||||
'>' if chars.get(i + 1) == Some(&'=') => {
|
|
||||||
tokens.push(Token::Ge);
|
|
||||||
i += 2;
|
|
||||||
}
|
|
||||||
'<' => {
|
|
||||||
tokens.push(Token::Lt);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'>' => {
|
|
||||||
tokens.push(Token::Gt);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'=' => {
|
|
||||||
tokens.push(Token::Eq);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
'"' => {
|
|
||||||
i += 1;
|
|
||||||
let mut s = String::new();
|
|
||||||
while i < chars.len() && chars[i] != '"' {
|
|
||||||
s.push(chars[i]);
|
|
||||||
i += 1;
|
|
||||||
}
|
}
|
||||||
if i < chars.len() {
|
None => out.push('\\'),
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
tokens.push(Token::Str(s));
|
|
||||||
}
|
}
|
||||||
'|' => {
|
} else {
|
||||||
i += 1;
|
out.push(c);
|
||||||
let mut s = String::new();
|
|
||||||
while i < chars.len() && chars[i] != '|' {
|
|
||||||
s.push(chars[i]);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
if i < chars.len() {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
tokens.push(Token::Ident(s));
|
|
||||||
}
|
|
||||||
c if c.is_ascii_digit() || c == '.' => {
|
|
||||||
let mut num = String::new();
|
|
||||||
while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
|
|
||||||
num.push(chars[i]);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
tokens.push(Token::Number(num.parse()?));
|
|
||||||
}
|
|
||||||
c if c.is_alphabetic() || c == '_' => {
|
|
||||||
let mut ident = String::new();
|
|
||||||
while i < chars.len()
|
|
||||||
&& (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == ' ')
|
|
||||||
{
|
|
||||||
// Don't consume trailing spaces if next non-space is operator
|
|
||||||
if chars[i] == ' ' {
|
|
||||||
// Peek ahead past spaces to find the next word/token
|
|
||||||
let j = i + 1;
|
|
||||||
let next_nonspace = chars[j..].iter().find(|&&c| c != ' ');
|
|
||||||
if matches!(
|
|
||||||
next_nonspace,
|
|
||||||
Some('+')
|
|
||||||
| Some('-')
|
|
||||||
| Some('*')
|
|
||||||
| Some('/')
|
|
||||||
| Some('^')
|
|
||||||
| Some(')')
|
|
||||||
| Some(',')
|
|
||||||
| Some('<')
|
|
||||||
| Some('>')
|
|
||||||
| Some('=')
|
|
||||||
| Some('!')
|
|
||||||
| Some('"')
|
|
||||||
| None
|
|
||||||
) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Break if the identifier collected so far is a keyword
|
|
||||||
let trimmed = ident.trim_end().to_ascii_uppercase();
|
|
||||||
if matches!(
|
|
||||||
trimmed.as_str(),
|
|
||||||
"WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF"
|
|
||||||
) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Also break if the next word is a keyword
|
|
||||||
let rest: String = chars[j..].iter().collect();
|
|
||||||
let next_word: String = rest
|
|
||||||
.trim_start()
|
|
||||||
.chars()
|
|
||||||
.take_while(|c| c.is_alphanumeric() || *c == '_')
|
|
||||||
.collect();
|
|
||||||
let upper = next_word.to_ascii_uppercase();
|
|
||||||
if matches!(
|
|
||||||
upper.as_str(),
|
|
||||||
"WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF"
|
|
||||||
) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ident.push(chars[i]);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
let ident = ident.trim_end().to_string();
|
|
||||||
tokens.push(Token::Ident(ident));
|
|
||||||
}
|
|
||||||
c => return Err(anyhow!("Unexpected character '{c}' in expression")),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(tokens)
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_add_sub(tokens: &[Token], pos: &mut usize) -> Result<Expr> {
|
// ---- small helpers ------------------------------------------------------
|
||||||
let mut left = parse_mul_div(tokens, pos)?;
|
|
||||||
while *pos < tokens.len() {
|
fn first_inner<'a>(pair: Pair<'a, Rule>, ctx: &str) -> Result<Pair<'a, Rule>> {
|
||||||
let op = match &tokens[*pos] {
|
pair.into_inner()
|
||||||
Token::Plus => BinOp::Add,
|
.next()
|
||||||
Token::Minus => BinOp::Sub,
|
.ok_or_else(|| anyhow!("empty rule: {}", ctx))
|
||||||
_ => break,
|
}
|
||||||
};
|
|
||||||
*pos += 1;
|
/// Fold a left-associative binary-operator rule of the shape
|
||||||
let right = parse_mul_div(tokens, pos)?;
|
/// `rule = { child ~ (op ~ child)* }` into a left-leaning BinOp tree.
|
||||||
|
fn fold_left_binop<F, M>(pair: Pair<Rule>, mut build_child: F, match_op: M) -> Result<Expr>
|
||||||
|
where
|
||||||
|
F: FnMut(Pair<Rule>) -> Result<Expr>,
|
||||||
|
M: Fn(&str) -> Option<BinOp>,
|
||||||
|
{
|
||||||
|
let mut pairs = pair.into_inner();
|
||||||
|
let first = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("empty binop rule"))?;
|
||||||
|
let mut left = build_child(first)?;
|
||||||
|
while let Some(op_pair) = pairs.next() {
|
||||||
|
let op = match_op(op_pair.as_str()).ok_or_else(|| {
|
||||||
|
anyhow!("unexpected operator token: {:?}", op_pair.as_str())
|
||||||
|
})?;
|
||||||
|
let right_pair = pairs
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow!("missing rhs for operator"))?;
|
||||||
|
let right = build_child(right_pair)?;
|
||||||
left = Expr::BinOp(op, Box::new(left), Box::new(right));
|
left = Expr::BinOp(op, Box::new(left), Box::new(right));
|
||||||
}
|
}
|
||||||
Ok(left)
|
Ok(left)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_mul_div(tokens: &[Token], pos: &mut usize) -> Result<Expr> {
|
|
||||||
let mut left = parse_pow(tokens, pos)?;
|
|
||||||
while *pos < tokens.len() {
|
|
||||||
let op = match &tokens[*pos] {
|
|
||||||
Token::Star => BinOp::Mul,
|
|
||||||
Token::Slash => BinOp::Div,
|
|
||||||
_ => break,
|
|
||||||
};
|
|
||||||
*pos += 1;
|
|
||||||
let right = parse_pow(tokens, pos)?;
|
|
||||||
left = Expr::BinOp(op, Box::new(left), Box::new(right));
|
|
||||||
}
|
|
||||||
Ok(left)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_pow(tokens: &[Token], pos: &mut usize) -> Result<Expr> {
|
|
||||||
let base = parse_unary(tokens, pos)?;
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::Caret {
|
|
||||||
*pos += 1;
|
|
||||||
let exp = parse_unary(tokens, pos)?;
|
|
||||||
return Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp)));
|
|
||||||
}
|
|
||||||
Ok(base)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_unary(tokens: &[Token], pos: &mut usize) -> Result<Expr> {
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::Minus {
|
|
||||||
*pos += 1;
|
|
||||||
let e = parse_primary(tokens, pos)?;
|
|
||||||
return Ok(Expr::UnaryMinus(Box::new(e)));
|
|
||||||
}
|
|
||||||
parse_primary(tokens, pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_primary(tokens: &[Token], pos: &mut usize) -> Result<Expr> {
|
|
||||||
if *pos >= tokens.len() {
|
|
||||||
return Err(anyhow!("Unexpected end of expression"));
|
|
||||||
}
|
|
||||||
match &tokens[*pos].clone() {
|
|
||||||
Token::Number(n) => {
|
|
||||||
*pos += 1;
|
|
||||||
Ok(Expr::Number(*n))
|
|
||||||
}
|
|
||||||
Token::Ident(name) => {
|
|
||||||
let name = name.clone();
|
|
||||||
*pos += 1;
|
|
||||||
// Check for function call
|
|
||||||
let upper = name.to_ascii_uppercase();
|
|
||||||
match upper.as_str() {
|
|
||||||
"SUM" | "AVG" | "MIN" | "MAX" | "COUNT" => {
|
|
||||||
let func = match upper.as_str() {
|
|
||||||
"SUM" => AggFunc::Sum,
|
|
||||||
"AVG" => AggFunc::Avg,
|
|
||||||
"MIN" => AggFunc::Min,
|
|
||||||
"MAX" => AggFunc::Max,
|
|
||||||
"COUNT" => AggFunc::Count,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::LParen {
|
|
||||||
*pos += 1;
|
|
||||||
let inner = parse_add_sub(tokens, pos)?;
|
|
||||||
// Optional WHERE filter
|
|
||||||
let filter = if *pos < tokens.len() {
|
|
||||||
if let Token::Ident(kw) = &tokens[*pos] {
|
|
||||||
if kw.eq_ignore_ascii_case("WHERE") {
|
|
||||||
*pos += 1;
|
|
||||||
let cat = match &tokens[*pos] {
|
|
||||||
Token::Ident(s) => {
|
|
||||||
let s = s.clone();
|
|
||||||
*pos += 1;
|
|
||||||
s
|
|
||||||
}
|
|
||||||
t => {
|
|
||||||
return Err(anyhow!(
|
|
||||||
"Expected category name, got {t:?}"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// expect =
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::Eq {
|
|
||||||
*pos += 1;
|
|
||||||
}
|
|
||||||
let item = match &tokens[*pos] {
|
|
||||||
Token::Str(s) | Token::Ident(s) => {
|
|
||||||
let s = s.clone();
|
|
||||||
*pos += 1;
|
|
||||||
s
|
|
||||||
}
|
|
||||||
t => return Err(anyhow!("Expected item name, got {t:?}")),
|
|
||||||
};
|
|
||||||
Some(Filter {
|
|
||||||
category: cat,
|
|
||||||
item,
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
// expect )
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::RParen {
|
|
||||||
*pos += 1;
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!("Expected ')' to close aggregate function"));
|
|
||||||
}
|
|
||||||
return Ok(Expr::Agg(func, Box::new(inner), filter));
|
|
||||||
}
|
|
||||||
Ok(Expr::Ref(name))
|
|
||||||
}
|
|
||||||
"IF" => {
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::LParen {
|
|
||||||
*pos += 1;
|
|
||||||
let cond = parse_comparison(tokens, pos)?;
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::Comma {
|
|
||||||
*pos += 1;
|
|
||||||
}
|
|
||||||
let then = parse_add_sub(tokens, pos)?;
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::Comma {
|
|
||||||
*pos += 1;
|
|
||||||
}
|
|
||||||
let else_ = parse_add_sub(tokens, pos)?;
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::RParen {
|
|
||||||
*pos += 1;
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!("Expected ')' to close IF(...)"));
|
|
||||||
}
|
|
||||||
return Ok(Expr::If(Box::new(cond), Box::new(then), Box::new(else_)));
|
|
||||||
}
|
|
||||||
Ok(Expr::Ref(name))
|
|
||||||
}
|
|
||||||
_ => Ok(Expr::Ref(name)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Token::LParen => {
|
|
||||||
*pos += 1;
|
|
||||||
let e = parse_add_sub(tokens, pos)?;
|
|
||||||
if *pos < tokens.len() && tokens[*pos] == Token::RParen {
|
|
||||||
*pos += 1;
|
|
||||||
}
|
|
||||||
Ok(e)
|
|
||||||
}
|
|
||||||
t => Err(anyhow!("Unexpected token in expression: {t:?}")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_comparison(tokens: &[Token], pos: &mut usize) -> Result<Expr> {
|
|
||||||
let left = parse_add_sub(tokens, pos)?;
|
|
||||||
if *pos >= tokens.len() {
|
|
||||||
return Ok(left);
|
|
||||||
}
|
|
||||||
let op = match &tokens[*pos] {
|
|
||||||
Token::Eq => BinOp::Eq,
|
|
||||||
Token::Ne => BinOp::Ne,
|
|
||||||
Token::Lt => BinOp::Lt,
|
|
||||||
Token::Gt => BinOp::Gt,
|
|
||||||
Token::Le => BinOp::Le,
|
|
||||||
Token::Ge => BinOp::Ge,
|
|
||||||
_ => return Ok(left),
|
|
||||||
};
|
|
||||||
*pos += 1;
|
|
||||||
let right = parse_add_sub(tokens, pos)?;
|
|
||||||
Ok(Expr::BinOp(op, Box::new(left), Box::new(right)))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::parse_formula;
|
use super::parse_formula;
|
||||||
@ -544,8 +403,8 @@ mod tests {
|
|||||||
assert_eq!(filter.item, "East");
|
assert_eq!(filter.item, "East");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Regression: WHERE inside aggregate parens must tokenize correctly.
|
/// Regression: WHERE inside aggregate parens must parse as the
|
||||||
/// The tokenizer must not merge "Revenue WHERE" into a single identifier.
|
/// aggregate's inline filter, not as a top-level WHERE clause.
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_sum_with_inline_where_filter() {
|
fn parse_sum_with_inline_where_filter() {
|
||||||
let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap();
|
let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap();
|
||||||
@ -562,7 +421,6 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_if_with_comparison_operators() {
|
fn parse_if_with_comparison_operators() {
|
||||||
// Test each comparison operator in an IF expression
|
|
||||||
let f = parse_formula("X = IF(A != 0, A, 1)", "Cat").unwrap();
|
let f = parse_formula("X = IF(A != 0, A, 1)", "Cat").unwrap();
|
||||||
assert!(matches!(f.expr, Expr::If(_, _, _)));
|
assert!(matches!(f.expr, Expr::If(_, _, _)));
|
||||||
|
|
||||||
@ -583,7 +441,6 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_where_with_quoted_string_inside_expression() {
|
fn parse_where_with_quoted_string_inside_expression() {
|
||||||
// WHERE inside a formula string with quotes
|
|
||||||
let f = parse_formula("X = Revenue WHERE Region = \"West Coast\"", "Foo").unwrap();
|
let f = parse_formula("X = Revenue WHERE Region = \"West Coast\"", "Foo").unwrap();
|
||||||
let filter = f.filter.as_ref().unwrap();
|
let filter = f.filter.as_ref().unwrap();
|
||||||
assert_eq!(filter.item, "West Coast");
|
assert_eq!(filter.item, "West Coast");
|
||||||
@ -650,11 +507,10 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Quoted string in tokenizer ──────────────────────────────────────
|
// ── Quoted string in WHERE ──────────────────────────────────────────
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_quoted_string_in_where() {
|
fn parse_quoted_string_in_where() {
|
||||||
// Quoted strings work in top-level WHERE clauses
|
|
||||||
let f = parse_formula("X = Revenue WHERE Region = \"East\"", "Cat").unwrap();
|
let f = parse_formula("X = Revenue WHERE Region = \"East\"", "Cat").unwrap();
|
||||||
let filter = f.filter.as_ref().unwrap();
|
let filter = f.filter.as_ref().unwrap();
|
||||||
assert_eq!(filter.item, "East");
|
assert_eq!(filter.item, "East");
|
||||||
@ -681,27 +537,21 @@ mod tests {
|
|||||||
assert!(parse_expr("").is_err());
|
assert!(parse_expr("").is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Multi-word identifiers must be pipe-quoted ──────────────────────
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenizer_breaks_at_where_keyword() {
|
fn multi_word_bare_identifier_is_rejected() {
|
||||||
use super::tokenize;
|
// Multi-word identifiers must be pipe-quoted; bare multi-word fails
|
||||||
let tokens = tokenize("Revenue WHERE Region").unwrap();
|
// the `bare_name`-compatible grammar rule.
|
||||||
// Should produce 3 tokens: Ident("Revenue"), Ident("WHERE"), Ident("Region")
|
assert!(parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").is_err());
|
||||||
assert_eq!(tokens.len(), 3, "Expected 3 tokens, got: {tokens:?}");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Multi-word identifiers ──────────────────────────────────────────
|
// ── WHERE inside quotes in the expression ───────────────────────────
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_multi_word_identifier() {
|
fn where_inside_quotes_is_not_a_keyword() {
|
||||||
let f = parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").unwrap();
|
// A filter value containing the literal text "WHERE" is parsed as
|
||||||
assert_eq!(f.target, "Total Revenue");
|
// a string, not as a nested WHERE keyword.
|
||||||
}
|
|
||||||
|
|
||||||
// ── WHERE inside quotes in split_where ──────────────────────────────
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn split_where_ignores_where_inside_quotes() {
|
|
||||||
// WHERE inside quotes should not be treated as a keyword
|
|
||||||
let f = parse_formula("X = Revenue WHERE Region = \"WHERE\"", "Foo").unwrap();
|
let f = parse_formula("X = Revenue WHERE Region = \"WHERE\"", "Foo").unwrap();
|
||||||
let filter = f.filter.as_ref().unwrap();
|
let filter = f.filter.as_ref().unwrap();
|
||||||
assert_eq!(filter.item, "WHERE");
|
assert_eq!(filter.item, "WHERE");
|
||||||
@ -773,4 +623,17 @@ mod tests {
|
|||||||
panic!("Expected SUM with WHERE filter, got: {:?}", f.expr);
|
panic!("Expected SUM with WHERE filter, got: {:?}", f.expr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Pipe-quoted escape semantics ────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_escape_literal_pipe() {
|
||||||
|
// \| inside a pipe-quoted identifier is a literal pipe
|
||||||
|
let f = parse_formula("X = |A\\|B|", "Cat").unwrap();
|
||||||
|
if let Expr::Ref(ref s) = f.expr {
|
||||||
|
assert_eq!(s, "A|B");
|
||||||
|
} else {
|
||||||
|
panic!("Expected Ref, got: {:?}", f.expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user