feat(formula): support pipe-quoted identifiers |...|
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -115,17 +115,28 @@ Formulas are parsed into a typed AST (`Expr` enum) at entry time. If the syntax
|
||||
is invalid, the user gets an error immediately. The evaluator only sees
|
||||
well-formed trees — it does not need to handle malformed input.
|
||||
|
||||
### Formula Tokenizer: Multi-Word Identifiers and Keywords
|
||||
### Formula Tokenizer: Identifiers and Quoting
|
||||
|
||||
The formula tokenizer supports multi-word identifiers (e.g., `Total Revenue`)
|
||||
by allowing spaces within identifier tokens when followed by non-operator
|
||||
characters. However, keywords (`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`,
|
||||
`IF`) act as token boundaries — the tokenizer breaks an identifier when:
|
||||
1. The identifier collected **so far** is a keyword (e.g., `WHERE ` stops at `WHERE`).
|
||||
2. The **next word** after a space is a keyword (e.g., `Revenue WHERE` stops at `Revenue`).
|
||||
**Bare identifiers** support multi-word names (e.g., `Total Revenue`) by
|
||||
allowing spaces when followed by non-operator, non-keyword characters. Keywords
|
||||
(`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`, `IF`) act as token boundaries.
|
||||
|
||||
This ensures `SUM(Revenue WHERE Region = "East")` tokenizes correctly as
|
||||
separate tokens while `Total Revenue` remains a single identifier.
|
||||
**Pipe-quoted identifiers** (`|...|`) allow any characters — including spaces,
|
||||
keywords, and operators — inside the delimiters. Use pipes when a category or
|
||||
item name collides with a keyword or contains special characters:
|
||||
|
||||
```
|
||||
|WHERE| — category named "WHERE"
|
||||
|Revenue (USD)| — name with parens
|
||||
|Cost + Tax| — name with operator chars
|
||||
SUM(|Net Revenue| WHERE |Region Name| = |East Coast|)
|
||||
```
|
||||
|
||||
Pipes produce `Token::Ident` (same as bare identifiers), so they work
|
||||
everywhere an identifier is expected: expressions, aggregate arguments, WHERE
|
||||
clause category names and filter values. Double-quoted strings (`"..."`)
|
||||
remain `Token::Str` and are used only for WHERE filter values in the
|
||||
`split_where` pre-parse step.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@ -38,6 +38,12 @@ fn split_where(s: &str) -> (&str, Option<&str>) {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
b'|' => {
|
||||
i += 1;
|
||||
while i < bytes.len() && bytes[i] != b'|' {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
_ if depth == 0 => {
|
||||
if s[i..].to_ascii_uppercase().starts_with("WHERE") {
|
||||
let before = &s[..i];
|
||||
@ -54,14 +60,23 @@ fn split_where(s: &str) -> (&str, Option<&str>) {
|
||||
(s, None)
|
||||
}
|
||||
|
||||
/// Strip pipe or double-quote delimiters from a value.
|
||||
fn unquote(s: &str) -> String {
|
||||
let s = s.trim();
|
||||
if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) {
|
||||
s[1..s.len() - 1].to_string()
|
||||
} else {
|
||||
s.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_where(s: &str) -> Result<Filter> {
|
||||
// Format: Category = "Item" or Category = Item
|
||||
// Format: Category = "Item" or Category = |Item| or Category = Item
|
||||
let eq_pos = s
|
||||
.find('=')
|
||||
.ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?;
|
||||
let category = s[..eq_pos].trim().to_string();
|
||||
let item_raw = s[eq_pos + 1..].trim();
|
||||
let item = item_raw.trim_matches('"').to_string();
|
||||
let category = unquote(&s[..eq_pos]);
|
||||
let item = unquote(&s[eq_pos + 1..]);
|
||||
Ok(Filter { category, item })
|
||||
}
|
||||
|
||||
@ -176,6 +191,18 @@ fn tokenize(s: &str) -> Result<Vec<Token>> {
|
||||
}
|
||||
tokens.push(Token::Str(s));
|
||||
}
|
||||
'|' => {
|
||||
i += 1;
|
||||
let mut s = String::new();
|
||||
while i < chars.len() && chars[i] != '|' {
|
||||
s.push(chars[i]);
|
||||
i += 1;
|
||||
}
|
||||
if i < chars.len() {
|
||||
i += 1;
|
||||
}
|
||||
tokens.push(Token::Ident(s));
|
||||
}
|
||||
c if c.is_ascii_digit() || c == '.' => {
|
||||
let mut num = String::new();
|
||||
while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
|
||||
@ -695,4 +722,78 @@ mod tests {
|
||||
let filter = f.filter.as_ref().unwrap();
|
||||
assert_eq!(filter.item, "WHERE");
|
||||
}
|
||||
|
||||
// ── Pipe-quoted identifiers ─────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn pipe_quoted_identifier_in_expression() {
|
||||
let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Measure").unwrap();
|
||||
assert_eq!(f.target, "|Total Revenue|");
|
||||
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
|
||||
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue"));
|
||||
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Bonus"));
|
||||
} else {
|
||||
panic!("Expected Add, got: {:?}", f.expr);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipe_quoted_keyword_as_identifier() {
|
||||
// A category named "WHERE" can be referenced with pipes
|
||||
let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap();
|
||||
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
|
||||
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "WHERE"));
|
||||
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "SUM"));
|
||||
} else {
|
||||
panic!("Expected Add, got: {:?}", f.expr);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipe_quoted_identifier_with_special_chars() {
|
||||
// Pipes allow characters that would normally break tokenization
|
||||
let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap();
|
||||
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
|
||||
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)"));
|
||||
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax"));
|
||||
} else {
|
||||
panic!("Expected Add, got: {:?}", f.expr);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipe_quoted_in_aggregate() {
|
||||
let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap();
|
||||
if let Expr::Agg(AggFunc::Sum, inner, None) = &f.expr {
|
||||
assert!(matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue"));
|
||||
} else {
|
||||
panic!("Expected SUM aggregate, got: {:?}", f.expr);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipe_quoted_in_where_filter_value() {
|
||||
let f = parse_formula(
|
||||
"X = Revenue WHERE Region = |East Coast|",
|
||||
"Measure",
|
||||
)
|
||||
.unwrap();
|
||||
let filter = f.filter.as_ref().unwrap();
|
||||
assert_eq!(filter.item, "East Coast");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipe_quoted_in_inline_where() {
|
||||
let f = parse_formula(
|
||||
"X = SUM(Revenue WHERE |Region Name| = |East Coast|)",
|
||||
"Measure",
|
||||
)
|
||||
.unwrap();
|
||||
if let Expr::Agg(AggFunc::Sum, _, Some(filter)) = &f.expr {
|
||||
assert_eq!(filter.category, "Region Name");
|
||||
assert_eq!(filter.item, "East Coast");
|
||||
} else {
|
||||
panic!("Expected SUM with WHERE filter, got: {:?}", f.expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user