From ab7e00a217a814756e4344c058803f6f89296a00 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 8 Apr 2026 23:48:30 -0700 Subject: [PATCH] feat(formula): support pipe-quoted identifiers |...| Add CL/SQL-style symbol quoting using pipe delimiters for formula identifiers. This allows category and item names that collide with keywords (WHERE, SUM, IF, etc.) or contain special characters (parens, operators, spaces) to be used unambiguously in formulas: |WHERE| + |Revenue (USD)| SUM(|Net Revenue| WHERE |Region Name| = |East Coast|) Pipes produce Token::Ident (same as bare identifiers), so they work everywhere: expressions, aggregates, WHERE clauses. Double-quoted strings remain Token::Str for backward compatibility. Also updates split_where and parse_where to skip/strip pipe delimiters. Co-Authored-By: Claude Opus 4.6 (1M context) --- context/design-principles.md | 29 +++++++--- src/formula/parser.rs | 109 +++++++++++++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 13 deletions(-) diff --git a/context/design-principles.md b/context/design-principles.md index 2bfe0b5..137b790 100644 --- a/context/design-principles.md +++ b/context/design-principles.md @@ -115,17 +115,28 @@ Formulas are parsed into a typed AST (`Expr` enum) at entry time. If the syntax is invalid, the user gets an error immediately. The evaluator only sees well-formed trees — it does not need to handle malformed input. -### Formula Tokenizer: Multi-Word Identifiers and Keywords +### Formula Tokenizer: Identifiers and Quoting -The formula tokenizer supports multi-word identifiers (e.g., `Total Revenue`) -by allowing spaces within identifier tokens when followed by non-operator -characters. However, keywords (`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`, -`IF`) act as token boundaries — the tokenizer breaks an identifier when: -1. The identifier collected **so far** is a keyword (e.g., `WHERE ` stops at `WHERE`). -2. The **next word** after a space is a keyword (e.g., `Revenue WHERE` stops at `Revenue`). +**Bare identifiers** support multi-word names (e.g., `Total Revenue`) by +allowing spaces when followed by non-operator, non-keyword characters. Keywords +(`WHERE`, `SUM`, `AVG`, `MIN`, `MAX`, `COUNT`, `IF`) act as token boundaries. -This ensures `SUM(Revenue WHERE Region = "East")` tokenizes correctly as -separate tokens while `Total Revenue` remains a single identifier. +**Pipe-quoted identifiers** (`|...|`) allow any characters — including spaces, +keywords, and operators — inside the delimiters. Use pipes when a category or +item name collides with a keyword or contains special characters: + +``` +|WHERE| — category named "WHERE" +|Revenue (USD)| — name with parens +|Cost + Tax| — name with operator chars +SUM(|Net Revenue| WHERE |Region Name| = |East Coast|) +``` + +Pipes produce `Token::Ident` (same as bare identifiers), so they work +everywhere an identifier is expected: expressions, aggregate arguments, WHERE +clause category names and filter values. Double-quoted strings (`"..."`) +remain `Token::Str` and are used only for WHERE filter values in the +`split_where` pre-parse step. --- diff --git a/src/formula/parser.rs b/src/formula/parser.rs index 71e4b8f..337eb73 100644 --- a/src/formula/parser.rs +++ b/src/formula/parser.rs @@ -38,6 +38,12 @@ fn split_where(s: &str) -> (&str, Option<&str>) { i += 1; } } + b'|' => { + i += 1; + while i < bytes.len() && bytes[i] != b'|' { + i += 1; + } + } _ if depth == 0 => { if s[i..].to_ascii_uppercase().starts_with("WHERE") { let before = &s[..i]; @@ -54,14 +60,23 @@ fn split_where(s: &str) -> (&str, Option<&str>) { (s, None) } +/// Strip pipe or double-quote delimiters from a value. +fn unquote(s: &str) -> String { + let s = s.trim(); + if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) { + s[1..s.len() - 1].to_string() + } else { + s.to_string() + } +} + fn parse_where(s: &str) -> Result { - // Format: Category = "Item" or Category = Item + // Format: Category = "Item" or Category = |Item| or Category = Item let eq_pos = s .find('=') .ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?; - let category = s[..eq_pos].trim().to_string(); - let item_raw = s[eq_pos + 1..].trim(); - let item = item_raw.trim_matches('"').to_string(); + let category = unquote(&s[..eq_pos]); + let item = unquote(&s[eq_pos + 1..]); Ok(Filter { category, item }) } @@ -176,6 +191,18 @@ fn tokenize(s: &str) -> Result> { } tokens.push(Token::Str(s)); } + '|' => { + i += 1; + let mut s = String::new(); + while i < chars.len() && chars[i] != '|' { + s.push(chars[i]); + i += 1; + } + if i < chars.len() { + i += 1; + } + tokens.push(Token::Ident(s)); + } c if c.is_ascii_digit() || c == '.' => { let mut num = String::new(); while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') { @@ -695,4 +722,78 @@ mod tests { let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "WHERE"); } + + // ── Pipe-quoted identifiers ───────────────────────────────────────── + + #[test] + fn pipe_quoted_identifier_in_expression() { + let f = parse_formula("|Total Revenue| = |Base Revenue| + Bonus", "Measure").unwrap(); + assert_eq!(f.target, "|Total Revenue|"); + if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Base Revenue")); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Bonus")); + } else { + panic!("Expected Add, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_keyword_as_identifier() { + // A category named "WHERE" can be referenced with pipes + let f = parse_formula("X = |WHERE| + |SUM|", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(ref s) if s == "WHERE")); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "SUM")); + } else { + panic!("Expected Add, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_identifier_with_special_chars() { + // Pipes allow characters that would normally break tokenization + let f = parse_formula("X = |Revenue (USD)| + |Cost + Tax|", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(ref s) if s == "Revenue (USD)")); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "Cost + Tax")); + } else { + panic!("Expected Add, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_in_aggregate() { + let f = parse_formula("X = SUM(|Net Revenue|)", "Cat").unwrap(); + if let Expr::Agg(AggFunc::Sum, inner, None) = &f.expr { + assert!(matches!(**inner, Expr::Ref(ref s) if s == "Net Revenue")); + } else { + panic!("Expected SUM aggregate, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_in_where_filter_value() { + let f = parse_formula( + "X = Revenue WHERE Region = |East Coast|", + "Measure", + ) + .unwrap(); + let filter = f.filter.as_ref().unwrap(); + assert_eq!(filter.item, "East Coast"); + } + + #[test] + fn pipe_quoted_in_inline_where() { + let f = parse_formula( + "X = SUM(Revenue WHERE |Region Name| = |East Coast|)", + "Measure", + ) + .unwrap(); + if let Expr::Agg(AggFunc::Sum, _, Some(filter)) = &f.expr { + assert_eq!(filter.category, "Region Name"); + assert_eq!(filter.item, "East Coast"); + } else { + panic!("Expected SUM with WHERE filter, got: {:?}", f.expr); + } + } }