diff --git a/src/formula/parser.rs b/src/formula/parser.rs index 4fd39d8..71e4b8f 100644 --- a/src/formula/parser.rs +++ b/src/formula/parser.rs @@ -212,6 +212,14 @@ fn tokenize(s: &str) -> Result> { ) { break; } + // Break if the identifier collected so far is a keyword + let trimmed = ident.trim_end().to_ascii_uppercase(); + if matches!( + trimmed.as_str(), + "WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF" + ) { + break; + } // Also break if the next word is a keyword let rest: String = chars[j..].iter().collect(); let next_word: String = rest @@ -500,15 +508,6 @@ mod tests { // ── Aggregate with WHERE filter ───────────────────────────────────── - /// NOTE: WHERE inside aggregate parens is broken when the inner expression - /// is a bare identifier. The tokenizer treats "Revenue WHERE" as a single - /// multi-word identifier because it greedily consumes spaces followed by - /// non-operator characters. The WHERE-inside-aggregate syntax only works - /// if the inner expression is a number, parenthesized, or otherwise - /// terminated before the WHERE keyword. - /// - /// Top-level WHERE (outside parens) works fine because split_where handles - /// it before tokenization. #[test] fn parse_sum_with_top_level_where_works() { let f = parse_formula( @@ -667,6 +666,14 @@ mod tests { assert!(parse_expr("").is_err()); } + #[test] + fn tokenizer_breaks_at_where_keyword() { + use super::tokenize; + let tokens = tokenize("Revenue WHERE Region").unwrap(); + // Should produce 3 tokens: Ident("Revenue"), Ident("WHERE"), Ident("Region") + assert_eq!(tokens.len(), 3, "Expected 3 tokens, got: {tokens:?}"); + } + // ── Multi-word identifiers ────────────────────────────────────────── #[test]