feat(formula): support pipe-quoted identifiers |...|
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -115,17 +115,28 @@ Formulas are parsed into a typed AST (`Expr` enum) at entry time. If the syntax
|
|||||||
is invalid, the user gets an error immediately. The evaluator only sees
|
is invalid, the user gets an error immediately. The evaluator only sees
|
||||||
well-formed trees — it does not need to handle malformed input.
|
well-formed trees — it does not need to handle malformed input.
|
||||||
|
|
||||||
### Formula Tokenizer: Multi-Word Identifiers and Keywords
|
### Formula Tokenizer: Identifiers and Quoting
|
||||||
|
|
||||||
The formula tokenizer supports multi-word identifiers (e.g., `Total Revenue`)
|
**Bare identifiers** support multi-word names (e.g., `Total Revenue`) by
|
||||||
by allowing spaces within identifier tokens when followed by non-operator
|
allowing spaces when followed by non-operator, non-keyword characters. Keywords
|
||||||
characters. However, keywords (`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`,
|
(`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`, `IF`) act as token boundaries.
|
||||||
`IF`) act as token boundaries — the tokenizer breaks an identifier when:
|
|
||||||
1. The identifier collected **so far** is a keyword (e.g., `WHERE ` stops at `WHERE`).
|
|
||||||
2. The **next word** after a space is a keyword (e.g., `Revenue WHERE` stops at `Revenue`).
|
|
||||||
|
|
||||||
This ensures `SUM(Revenue WHERE Region = "East")` tokenizes correctly as
|
**Pipe-quoted identifiers** (`|...|`) allow any characters — including spaces,
|
||||||
separate tokens while `Total Revenue` remains a single identifier.
|
keywords, and operators — inside the delimiters. Use pipes when a category or
|
||||||
|
item name collides with a keyword or contains special characters:
|
||||||
|
|
||||||
|
```
|
||||||
|
|WHERE| — category named "WHERE"
|
||||||
|
|Revenue (USD)| — name with parens
|
||||||
|
|Cost + Tax| — name with operator chars
|
||||||
|
SUM(|Net Revenue| WHERE |Region Name| = |East Coast|)
|
||||||
|
```
|
||||||
|
|
||||||
|
Pipes produce `Token::Ident` (same as bare identifiers), so they work
|
||||||
|
everywhere an identifier is expected: expressions, aggregate arguments, WHERE
|
||||||
|
clause category names and filter values. Double-quoted strings (`"..."`)
|
||||||
|
remain `Token::Str` and are used only for WHERE filter values in the
|
||||||
|
`split_where` pre-parse step.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@ -38,6 +38,12 @@ fn split_where(s: &str) -> (&str, Option<&str>) {
|
|||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
b'|' => {
|
||||||
|
i += 1;
|
||||||
|
while i < bytes.len() && bytes[i] != b'|' {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
_ if depth == 0 => {
|
_ if depth == 0 => {
|
||||||
if s[i..].to_ascii_uppercase().starts_with("WHERE") {
|
if s[i..].to_ascii_uppercase().starts_with("WHERE") {
|
||||||
let before = &s[..i];
|
let before = &s[..i];
|
||||||
@ -54,14 +60,23 @@ fn split_where(s: &str) -> (&str, Option<&str>) {
|
|||||||
(s, None)
|
(s, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Strip pipe or double-quote delimiters from a value.
|
||||||
|
fn unquote(s: &str) -> String {
|
||||||
|
let s = s.trim();
|
||||||
|
if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) {
|
||||||
|
s[1..s.len() - 1].to_string()
|
||||||
|
} else {
|
||||||
|
s.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_where(s: &str) -> Result<Filter> {
|
fn parse_where(s: &str) -> Result<Filter> {
|
||||||
// Format: Category = "Item" or Category = Item
|
// Format: Category = "Item" or Category = |Item| or Category = Item
|
||||||
let eq_pos = s
|
let eq_pos = s
|
||||||
.find('=')
|
.find('=')
|
||||||
.ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?;
|
.ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?;
|
||||||
let category = s[..eq_pos].trim().to_string();
|
let category = unquote(&s[..eq_pos]);
|
||||||
let item_raw = s[eq_pos + 1..].trim();
|
let item = unquote(&s[eq_pos + 1..]);
|
||||||
let item = item_raw.trim_matches('"').to_string();
|
|
||||||
Ok(Filter { category, item })
|
Ok(Filter { category, item })
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,6 +191,18 @@ fn tokenize(s: &str) -> Result<Vec<Token>> {
|
|||||||
}
|
}
|
||||||
tokens.push(Token::Str(s));
|
tokens.push(Token::Str(s));
|
||||||
}
|
}
|
||||||
|
'|' => {
|
||||||
|
i += 1;
|
||||||
|
let mut s = String::new();
|
||||||
|
while i < chars.len() && chars[i] != '|' {
|
||||||
|
s.push(chars[i]);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
if i < chars.len() {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
tokens.push(Token::Ident(s));
|
||||||
|
}
|
||||||
c if c.is_ascii_digit() || c == '.' => {
|
c if c.is_ascii_digit() || c == '.' => {
|
||||||
let mut num = String::new();
|
let mut num = String::new();
|
||||||
while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
|
while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
|
||||||
@ -695,4 +722,78 @@ mod tests {
|
|||||||
let filter = f.filter.as_ref().unwrap();
|
let filter = f.filter.as_ref().unwrap();
|
||||||
assert_eq!(filter.item, "WHERE");
|
assert_eq!(filter.item, "WHERE");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Pipe-quoted identifiers ─────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_identifier_in_expression() {
|
||||||
|
let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Measure").unwrap();
|
||||||
|
assert_eq!(f.target, "|Total Revenue|");
|
||||||
|
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
|
||||||
|
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue"));
|
||||||
|
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Bonus"));
|
||||||
|
} else {
|
||||||
|
panic!("Expected Add, got: {:?}", f.expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_keyword_as_identifier() {
|
||||||
|
// A category named "WHERE" can be referenced with pipes
|
||||||
|
let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap();
|
||||||
|
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
|
||||||
|
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "WHERE"));
|
||||||
|
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "SUM"));
|
||||||
|
} else {
|
||||||
|
panic!("Expected Add, got: {:?}", f.expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_identifier_with_special_chars() {
|
||||||
|
// Pipes allow characters that would normally break tokenization
|
||||||
|
let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap();
|
||||||
|
if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr {
|
||||||
|
assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)"));
|
||||||
|
assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax"));
|
||||||
|
} else {
|
||||||
|
panic!("Expected Add, got: {:?}", f.expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_in_aggregate() {
|
||||||
|
let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap();
|
||||||
|
if let Expr::Agg(AggFunc::Sum, inner, None) = &f.expr {
|
||||||
|
assert!(matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue"));
|
||||||
|
} else {
|
||||||
|
panic!("Expected SUM aggregate, got: {:?}", f.expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_in_where_filter_value() {
|
||||||
|
let f = parse_formula(
|
||||||
|
"X = Revenue WHERE Region = |East Coast|",
|
||||||
|
"Measure",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let filter = f.filter.as_ref().unwrap();
|
||||||
|
assert_eq!(filter.item, "East Coast");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pipe_quoted_in_inline_where() {
|
||||||
|
let f = parse_formula(
|
||||||
|
"X = SUM(Revenue WHERE |Region Name| = |East Coast|)",
|
||||||
|
"Measure",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
if let Expr::Agg(AggFunc::Sum, _, Some(filter)) = &f.expr {
|
||||||
|
assert_eq!(filter.category, "Region Name");
|
||||||
|
assert_eq!(filter.item, "East Coast");
|
||||||
|
} else {
|
||||||
|
panic!("Expected SUM with WHERE filter, got: {:?}", f.expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user