feat(formula): support pipe-quoted identifiers |...|

Add CL/SQL-style symbol quoting using pipe delimiters for formula
identifiers. This allows category and item names that collide with
keywords (WHERE, SUM, IF, etc.) or contain special characters
(parens, operators, spaces) to be used unambiguously in formulas:

  |WHERE| + |Revenue (USD)|
  SUM(|Net Revenue| WHERE |Region Name| = |East Coast|)

Pipes produce Token::Ident (same as bare identifiers), so they work
everywhere: expressions, aggregates, WHERE clauses. Double-quoted
strings remain Token::Str for backward compatibility.

Also updates split_where and parse_where to skip/strip pipe delimiters.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Edward Langley
2026-04-08 23:48:30 -07:00
parent 637178f3f6
commit ab7e00a217
2 changed files with 125 additions and 13 deletions

View File

@ -115,17 +115,28 @@ Formulas are parsed into a typed AST (`Expr` enum) at entry time. If the syntax
is invalid, the user gets an error immediately. The evaluator only sees is invalid, the user gets an error immediately. The evaluator only sees
well-formed trees — it does not need to handle malformed input. well-formed trees — it does not need to handle malformed input.
### Formula Tokenizer: Multi-Word Identifiers and Keywords ### Formula Tokenizer: Identifiers and Quoting
The formula tokenizer supports multi-word identifiers (e.g., `Total Revenue`) **Bare identifiers** support multi-word names (e.g., `Total Revenue`) by
by allowing spaces within identifier tokens when followed by non-operator allowing spaces when followed by non-operator, non-keyword characters. Keywords
characters. However, keywords (`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`, (`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`, `IF`) act as token boundaries.
`IF`) act as token boundaries — the tokenizer breaks an identifier when:
1. The identifier collected **so far** is a keyword (e.g., `WHERE ` stops at `WHERE`).
2. The **next word** after a space is a keyword (e.g., `Revenue WHERE` stops at `Revenue`).
This ensures `SUM(Revenue WHERE Region = "East")` tokenizes correctly as **Pipe-quoted identifiers** (`|...|`) allow any characters — including spaces,
separate tokens while `Total Revenue` remains a single identifier. keywords, and operators — inside the delimiters. Use pipes when a category or
item name collides with a keyword or contains special characters:
```
|WHERE| — category named "WHERE"
|Revenue (USD)| — name with parens
|Cost + Tax| — name with operator chars
SUM(|Net Revenue| WHERE |Region Name| = |East Coast|)
```
Pipes produce `Token::Ident` (same as bare identifiers), so they work
everywhere an identifier is expected: expressions, aggregate arguments, WHERE
clause category names and filter values. Double-quoted strings (`"..."`)
remain `Token::Str` and are used only for WHERE filter values in the
`split_where` pre-parse step.
--- ---

View File

@ -38,6 +38,12 @@ fn split_where(s: &str) -> (&str, Option<&str>) {
i += 1; i += 1;
} }
} }
b'|' => {
i += 1;
while i < bytes.len() && bytes[i] != b'|' {
i += 1;
}
}
_ if depth == 0 => { _ if depth == 0 => {
if s[i..].to_ascii_uppercase().starts_with("WHERE") { if s[i..].to_ascii_uppercase().starts_with("WHERE") {
let before = &s[..i]; let before = &s[..i];
@ -54,14 +60,23 @@ fn split_where(s: &str) -> (&str, Option<&str>) {
(s, None) (s, None)
} }
/// Strip pipe or double-quote delimiters from a value.
fn unquote(s: &str) -> String {
let s = s.trim();
if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) {
s[1..s.len() - 1].to_string()
} else {
s.to_string()
}
}
fn parse_where(s: &str) -> Result<Filter> { fn parse_where(s: &str) -> Result<Filter> {
// Format: Category = "Item" or Category = Item // Format: Category = "Item" or Category = |Item| or Category = Item
let eq_pos = s let eq_pos = s
.find('=') .find('=')
.ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?; .ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?;
let category = s[..eq_pos].trim().to_string(); let category = unquote(&s[..eq_pos]);
let item_raw = s[eq_pos + 1..].trim(); let item = unquote(&s[eq_pos + 1..]);
let item = item_raw.trim_matches('"').to_string();
Ok(Filter { category, item }) Ok(Filter { category, item })
} }
@ -176,6 +191,18 @@ fn tokenize(s: &str) -> Result<Vec<Token>> {
} }
tokens.push(Token::Str(s)); tokens.push(Token::Str(s));
} }
'|' => {
i += 1;
let mut s = String::new();
while i < chars.len() && chars[i] != '|' {
s.push(chars[i]);
i += 1;
}
if i < chars.len() {
i += 1;
}
tokens.push(Token::Ident(s));
}
c if c.is_ascii_digit() || c == '.' => { c if c.is_ascii_digit() || c == '.' => {
let mut num = String::new(); let mut num = String::new();
while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') { while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
@ -695,4 +722,78 @@ mod tests {
let filter = f.filter.as_ref().unwrap(); let filter = f.filter.as_ref().unwrap();
assert_eq!(filter.item, "WHERE"); assert_eq!(filter.item, "WHERE");
} }
// ── Pipe-quoted identifiers ─────────────────────────────────────────
#[test]
fn pipe_quoted_identifier_in_expression() {
let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Measure").unwrap();
assert_eq!(f.target, "|Total Revenue|");
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue"));
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Bonus"));
} else {
panic!("Expected Add, got: {:?}", f.expr);
}
}
#[test]
fn pipe_quoted_keyword_as_identifier() {
// A category named "WHERE" can be referenced with pipes
let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap();
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "WHERE"));
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "SUM"));
} else {
panic!("Expected Add, got: {:?}", f.expr);
}
}
#[test]
fn pipe_quoted_identifier_with_special_chars() {
// Pipes allow characters that would normally break tokenization
let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap();
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)"));
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax"));
} else {
panic!("Expected Add, got: {:?}", f.expr);
}
}
#[test]
fn pipe_quoted_in_aggregate() {
let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap();
if let Expr::Agg(AggFunc::Sum, inner, None) = &f.expr {
assert!(matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue"));
} else {
panic!("Expected SUM aggregate, got: {:?}", f.expr);
}
}
#[test]
fn pipe_quoted_in_where_filter_value() {
let f = parse_formula(
"X = Revenue WHERE Region = |East Coast|",
"Measure",
)
.unwrap();
let filter = f.filter.as_ref().unwrap();
assert_eq!(filter.item, "East Coast");
}
#[test]
fn pipe_quoted_in_inline_where() {
let f = parse_formula(
"X = SUM(Revenue WHERE |Region Name| = |East Coast|)",
"Measure",
)
.unwrap();
if let Expr::Agg(AggFunc::Sum, _, Some(filter)) = &f.expr {
assert_eq!(filter.category, "Region Name");
assert_eq!(filter.item, "East Coast");
} else {
panic!("Expected SUM with WHERE filter, got: {:?}", f.expr);
}
}
} }