diff --git a/.gitignore b/.gitignore index a4e2370..c80d176 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ bench/*.txt *.swp proptest-regressions/ .cursor +.claude/worktrees/ diff --git a/Cargo.lock b/Cargo.lock index a3d10bf..b2565b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -756,6 +756,10 @@ name = "improvise-formula" version = "0.1.0-rc2" dependencies = [ "anyhow", + "pest", + "pest_derive", + "pest_meta", + "proptest", "serde", ] diff --git a/context/design-principles.md b/context/design-principles.md index 701ace0..bd3e5df 100644 --- a/context/design-principles.md +++ b/context/design-principles.md @@ -32,6 +32,12 @@ Commands compose via `Binding::Sequence` — a keymap entry can chain multiple commands, each contributing effects independently. The `o` key (add row + begin editing) is two commands composed at the binding level, not a monolithic handler. +### Decompose rather than early return + +Early Returns usually are a signal of mixed responsibilities: if an early return +would clarify a function, consider how the function could be decomposed for the +same effect without the early return. + --- ## 2. Polymorphism Over Conditionals diff --git a/crates/improvise-formula/Cargo.toml b/crates/improvise-formula/Cargo.toml index 338e0e6..f7c7a9c 100644 --- a/crates/improvise-formula/Cargo.toml +++ b/crates/improvise-formula/Cargo.toml @@ -8,4 +8,10 @@ repository = "https://github.com/fiddlerwoaroof/improvise" [dependencies] anyhow = "1" +pest = "2.8.6" +pest_derive = "2.8.6" serde = { version = "1", features = ["derive"] } + +[dev-dependencies] +pest_meta = "2.8.6" +proptest = "1" diff --git a/crates/improvise-formula/src/formula.pest b/crates/improvise-formula/src/formula.pest new file mode 100644 index 0000000..e698d3a --- /dev/null +++ b/crates/improvise-formula/src/formula.pest @@ -0,0 +1,91 @@ +// Formula grammar for improvise. +// +// A formula has the form: TARGET = EXPR [WHERE filter] +// See parser.rs for the tree walker that produces a Formula AST. +// +// Identifier rules (bare_ident / pipe_quoted) mirror `bare_name` and +// `pipe_quoted` in src/persistence/improv.pest: bare identifiers are +// alphanumeric plus `_` and `-`, with no internal spaces; multi-word +// names must be pipe-quoted. + +// Auto-skip horizontal whitespace between tokens in non-atomic rules. +WHITESPACE = _{ " " | "\t" } + +// ---- top-level ---------------------------------------------------------- + +formula = { SOI ~ target ~ "=" ~ expr ~ where_clause? ~ EOI } + +// The target keeps its raw text (including pipes, if any) — we capture +// the span directly rather than walking into its children. +target = { identifier } + +where_clause = { ^"WHERE" ~ identifier ~ "=" ~ filter_value } + +// ---- expressions -------------------------------------------------------- + +// Used by parse_expr() — forces a standalone expression to consume the +// whole input, so `1 + 2 3` fails instead of silently dropping " 3". +expr_eoi = { SOI ~ expr ~ EOI } + +expr = { add_expr } + +add_expr = { mul_expr ~ (add_op ~ mul_expr)* } +add_op = { "+" | "-" } + +mul_expr = { pow_expr ~ (mul_op ~ pow_expr)* } +mul_op = { "*" | "/" } + +pow_expr = { unary ~ (pow_op ~ unary)? } +pow_op = { "^" } + +unary = { unary_minus | primary } +unary_minus = { "-" ~ primary } + +primary = { + number + | agg_call + | if_expr + | paren_expr + | ref_expr +} + +paren_expr = { "(" ~ expr ~ ")" } + +// Aggregates with optional inline WHERE filter inside the parens. +agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" } +agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" } +inline_where = { ^"WHERE" ~ identifier ~ "=" ~ filter_value } + +// IF(cond, then, else). Comparison is a standalone rule because comparison +// operators are not valid in general expressions — only inside an IF condition. +if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" } +comparison = { expr ~ cmp_op ~ expr } +cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" } + +// A reference to an item. `SUM` and `IF` without parens fall through to +// this rule because agg_call / if_expr require a "(" and otherwise fail. +ref_expr = { identifier } + +// ---- identifiers -------------------------------------------------------- +// +// Mirror of improv.pest's bare_name / pipe_quoted. + +identifier = ${ pipe_quoted | bare_ident } + +// Backslash escapes inside pipes: \| literal pipe, \\ backslash, \n newline. +pipe_quoted = @{ "|" ~ ("\\" ~ ANY | !"|" ~ ANY)* ~ "|" } + +bare_ident = @{ + (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")* +} + +// ---- literal values ----------------------------------------------------- + +filter_value = { string | pipe_quoted | bare_ident } + +string = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" } + +number = @{ + ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? + | "." ~ ASCII_DIGIT+ +} diff --git a/crates/improvise-formula/src/parser.rs b/crates/improvise-formula/src/parser.rs index 01df646..51cb101 100644 --- a/crates/improvise-formula/src/parser.rs +++ b/crates/improvise-formula/src/parser.rs @@ -1,460 +1,296 @@ use anyhow::{Result, anyhow}; +use pest::Parser as _; +use pest::iterators::Pair; +use pest_derive::Parser; use super::ast::{AggFunc, BinOp, Expr, Filter, Formula}; +#[derive(Parser)] +#[grammar = "formula.pest"] +struct FormulaParser; + +/// Message used by `.expect()` calls on invariants that the grammar +/// guarantees. If one of these ever panics, the grammar and the tree +/// walker are out of sync — it's a bug, not a runtime condition. +const GRAMMAR_INVARIANT: &str = "grammar invariant violated: parser out of sync with formula.pest"; + /// Parse a formula string like "Profit = Revenue - Cost" /// or "Tax = Revenue * 0.08 WHERE Region = \"East\"" pub fn parse_formula(raw: &str, target_category: &str) -> Result { - let raw = raw.trim(); - - // Split on first `=` to get target = expression - let eq_pos = raw - .find('=') - .ok_or_else(|| anyhow!("Formula must contain '=': {raw}"))?; - let target = raw[..eq_pos].trim().to_string(); - let rest = raw[eq_pos + 1..].trim(); - - // Check for WHERE clause at top level - let (expr_str, filter) = split_where(rest); - let filter = filter.map(parse_where).transpose()?; - - let expr = parse_expr(expr_str.trim())?; - - Ok(Formula::new(raw, target, target_category, expr, filter)) + let input = raw.trim(); + let formula_pair = FormulaParser::parse(Rule::formula, input) + .map_err(|e| anyhow!("{}", e))? + .next() + .expect(GRAMMAR_INVARIANT); + Ok(build_formula(formula_pair, input, target_category)) } -fn split_where(s: &str) -> (&str, Option<&str>) { - // Find WHERE not inside parens or quotes - let bytes = s.as_bytes(); - let mut depth = 0i32; - let mut i = 0; - while i < bytes.len() { - match bytes[i] { - b'(' => depth += 1, - b')' => depth -= 1, - b'"' => { - i += 1; - while i < bytes.len() && bytes[i] != b'"' { - i += 1; - } - } - b'|' => { - i += 1; - while i < bytes.len() && bytes[i] != b'|' { - i += 1; - } - } - _ if depth == 0 => { - if s[i..].to_ascii_uppercase().starts_with("WHERE") { - let before = &s[..i]; - let after = &s[i + 5..]; - if before.ends_with(char::is_whitespace) || i == 0 { - return (before.trim(), Some(after.trim())); - } - } - } - _ => {} +/// Parse a bare expression (no target, no top-level WHERE clause). +/// Fails if the input contains trailing tokens after a complete expression. +pub fn parse_expr(s: &str) -> Result { + let input = s.trim(); + let expr_eoi_pair = FormulaParser::parse(Rule::expr_eoi, input) + .map_err(|e| anyhow!("{}", e))? + .next() + .expect(GRAMMAR_INVARIANT); + Ok(build_expr(first_inner(expr_eoi_pair))) +} + +// ---- tree walkers ------------------------------------------------------- +// +// Every `build_*` function below operates on a pest Pair that has already +// been validated by the grammar. Invariants like "a `formula` has exactly +// one `target`" or "a `comparison` has an lhs, op, and rhs" are guaranteed +// by pest before the tree walker sees the Pair, so these functions are +// infallible. Any `.expect(GRAMMAR_INVARIANT)` in here represents a bug +// marker — if it ever fires, the grammar and the walker have diverged. + +fn build_formula(pair: Pair, raw: &str, target_category: &str) -> Formula { + let mut target = None; + let mut expr = None; + let mut filter = None; + for inner in pair.into_inner() { + let rule = inner.as_rule(); + if rule == Rule::target { + target = Some(inner.as_str().trim().to_string()); + } else if rule == Rule::expr { + expr = Some(build_expr(inner)); + } else if rule == Rule::where_clause { + filter = Some(build_filter(inner)); } - i += 1; + // Rule::EOI and any silent rules are ignored. } - (s, None) + Formula::new( + raw, + target.expect(GRAMMAR_INVARIANT), + target_category, + expr.expect(GRAMMAR_INVARIANT), + filter, + ) } -/// Strip pipe or double-quote delimiters from a value. -fn unquote(s: &str) -> String { - let s = s.trim(); - if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) { - s[1..s.len() - 1].to_string() +fn build_expr(pair: Pair) -> Expr { + // expr = { add_expr } + build_add_expr(first_inner(pair)) +} + +fn build_add_expr(pair: Pair) -> Expr { + fold_left_binop(pair, build_mul_expr, |s| { + if s == "+" { + BinOp::Add + } else { + // The grammar restricts add_op to "+" | "-". + BinOp::Sub + } + }) +} + +fn build_mul_expr(pair: Pair) -> Expr { + fold_left_binop(pair, build_pow_expr, |s| { + if s == "*" { + BinOp::Mul + } else { + // The grammar restricts mul_op to "*" | "/". + BinOp::Div + } + }) +} + +fn build_pow_expr(pair: Pair) -> Expr { + // pow_expr = { unary ~ (pow_op ~ unary)? } + let mut pairs = pair.into_inner(); + let base = build_unary(pairs.next().expect(GRAMMAR_INVARIANT)); + match pairs.next() { + None => base, + Some(_pow_op) => { + let exp = build_unary(pairs.next().expect(GRAMMAR_INVARIANT)); + Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp)) + } + } +} + +fn build_unary(pair: Pair) -> Expr { + // unary = { unary_minus | primary } + let inner = first_inner(pair); + if inner.as_rule() == Rule::unary_minus { + Expr::UnaryMinus(Box::new(build_primary(first_inner(inner)))) + } else { + // primary is the only other alternative. + build_primary(inner) + } +} + +fn build_primary(pair: Pair) -> Expr { + // primary = { number | agg_call | if_expr | paren_expr | ref_expr } + let inner = first_inner(pair); + let rule = inner.as_rule(); + if rule == Rule::number { + Expr::Number(inner.as_str().parse().expect(GRAMMAR_INVARIANT)) + } else if rule == Rule::agg_call { + build_agg_call(inner) + } else if rule == Rule::if_expr { + build_if_expr(inner) + } else if rule == Rule::paren_expr { + build_expr(first_inner(inner)) + } else { + // ref_expr is the only remaining alternative. + Expr::Ref(identifier_to_string(first_inner(inner))) + } +} + +fn build_agg_call(pair: Pair) -> Expr { + // agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" } + let mut pairs = pair.into_inner(); + let func = parse_agg_func(pairs.next().expect(GRAMMAR_INVARIANT).as_str()); + let inner_expr = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + // The only pair after expr (if any) is `inline_where`, so we can + // map it directly without checking the rule variant. + let filter = pairs.next().map(build_filter); + Expr::Agg(func, Box::new(inner_expr), filter) +} + +fn parse_agg_func(s: &str) -> AggFunc { + // agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" } + match s.to_ascii_uppercase().as_str() { + "SUM" => AggFunc::Sum, + "AVG" => AggFunc::Avg, + "MIN" => AggFunc::Min, + "MAX" => AggFunc::Max, + // COUNT is the only remaining alternative. + _ => AggFunc::Count, + } +} + +fn build_if_expr(pair: Pair) -> Expr { + // if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" } + let mut pairs = pair.into_inner(); + let cond = build_comparison(pairs.next().expect(GRAMMAR_INVARIANT)); + let then_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + let else_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + Expr::If(Box::new(cond), Box::new(then_e), Box::new(else_e)) +} + +fn build_comparison(pair: Pair) -> Expr { + // comparison = { expr ~ cmp_op ~ expr } + let mut pairs = pair.into_inner(); + let lhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + let op = parse_cmp_op(pairs.next().expect(GRAMMAR_INVARIANT).as_str()); + let rhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + Expr::BinOp(op, Box::new(lhs), Box::new(rhs)) +} + +fn parse_cmp_op(s: &str) -> BinOp { + // cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" } + match s { + "=" => BinOp::Eq, + "!=" => BinOp::Ne, + "<" => BinOp::Lt, + ">" => BinOp::Gt, + "<=" => BinOp::Le, + // ">=" is the only remaining alternative. + _ => BinOp::Ge, + } +} + +fn build_filter(pair: Pair) -> Filter { + // where_clause / inline_where both have shape: + // ^"WHERE" ~ identifier ~ "=" ~ filter_value + let mut pairs = pair.into_inner(); + let category = identifier_to_string(pairs.next().expect(GRAMMAR_INVARIANT)); + let item = filter_value_to_string(pairs.next().expect(GRAMMAR_INVARIANT)); + Filter { category, item } +} + +fn filter_value_to_string(pair: Pair) -> String { + // filter_value = { string | pipe_quoted | bare_ident } + let inner = first_inner(pair); + let s = inner.as_str(); + let rule = inner.as_rule(); + if rule == Rule::string { + strip_string_quotes(s) + } else if rule == Rule::pipe_quoted { + unquote_pipe(s) + } else { + // bare_ident is the only remaining alternative. + s.to_string() + } +} + +/// Convert an identifier pair (identifier, pipe_quoted, or bare_ident) to +/// its content string. Pipe-quoted identifiers have their delimiters +/// stripped and backslash escapes applied; bare identifiers are returned +/// verbatim. +fn identifier_to_string(pair: Pair) -> String { + let s = pair.as_str(); + if is_pipe_quoted(s) { + unquote_pipe(s) } else { s.to_string() } } -fn parse_where(s: &str) -> Result { - // Format: Category = "Item" or Category = |Item| or Category = Item - let eq_pos = s - .find('=') - .ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?; - let category = unquote(&s[..eq_pos]); - let item = unquote(&s[eq_pos + 1..]); - Ok(Filter { category, item }) +fn is_pipe_quoted(s: &str) -> bool { + s.len() >= 2 && s.starts_with('|') && s.ends_with('|') } -/// Parse an expression using recursive descent -pub fn parse_expr(s: &str) -> Result { - let tokens = tokenize(s)?; - let mut pos = 0; - let expr = parse_add_sub(&tokens, &mut pos)?; - if pos < tokens.len() { - return Err(anyhow!( - "Unexpected token at position {pos}: {:?}", - tokens[pos] - )); - } - Ok(expr) +fn strip_string_quotes(s: &str) -> String { + debug_assert!(s.len() >= 2 && s.starts_with('"') && s.ends_with('"')); + s[1..s.len() - 1].to_string() } -#[derive(Debug, Clone, PartialEq)] -enum Token { - Number(f64), - Ident(String), - Str(String), - Plus, - Minus, - Star, - Slash, - Caret, - LParen, - RParen, - Comma, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, -} - -fn tokenize(s: &str) -> Result> { - let mut tokens = Vec::new(); - let chars: Vec = s.chars().collect(); - let mut i = 0; - - while i < chars.len() { - match chars[i] { - ' ' | '\t' | '\n' => i += 1, - '+' => { - tokens.push(Token::Plus); - i += 1; - } - '-' => { - tokens.push(Token::Minus); - i += 1; - } - '*' => { - tokens.push(Token::Star); - i += 1; - } - '/' => { - tokens.push(Token::Slash); - i += 1; - } - '^' => { - tokens.push(Token::Caret); - i += 1; - } - '(' => { - tokens.push(Token::LParen); - i += 1; - } - ')' => { - tokens.push(Token::RParen); - i += 1; - } - ',' => { - tokens.push(Token::Comma); - i += 1; - } - '!' if chars.get(i + 1) == Some(&'=') => { - tokens.push(Token::Ne); - i += 2; - } - '<' if chars.get(i + 1) == Some(&'=') => { - tokens.push(Token::Le); - i += 2; - } - '>' if chars.get(i + 1) == Some(&'=') => { - tokens.push(Token::Ge); - i += 2; - } - '<' => { - tokens.push(Token::Lt); - i += 1; - } - '>' => { - tokens.push(Token::Gt); - i += 1; - } - '=' => { - tokens.push(Token::Eq); - i += 1; - } - '"' => { - i += 1; - let mut s = String::new(); - while i < chars.len() && chars[i] != '"' { - s.push(chars[i]); - i += 1; +/// Strip surrounding pipes and apply backslash escapes: `\|` → `|`, +/// `\\` → `\`, `\n` → newline, and any other `\X` is preserved verbatim. +/// Matches the escape semantics documented in src/persistence/improv.pest. +fn unquote_pipe(s: &str) -> String { + debug_assert!(is_pipe_quoted(s)); + let inner = &s[1..s.len() - 1]; + let mut out = String::with_capacity(inner.len()); + let mut chars = inner.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + // The grammar rule `"\\" ~ ANY` guarantees that a backslash + // inside a pipe-quoted identifier is always followed by another + // character, so `chars.next()` cannot be `None` here. + let escaped = chars.next().expect(GRAMMAR_INVARIANT); + match escaped { + '|' => out.push('|'), + '\\' => out.push('\\'), + 'n' => out.push('\n'), + other => { + out.push('\\'); + out.push(other); } - if i < chars.len() { - i += 1; - } - tokens.push(Token::Str(s)); } - '|' => { - i += 1; - let mut s = String::new(); - while i < chars.len() && chars[i] != '|' { - s.push(chars[i]); - i += 1; - } - if i < chars.len() { - i += 1; - } - tokens.push(Token::Ident(s)); - } - c if c.is_ascii_digit() || c == '.' => { - let mut num = String::new(); - while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') { - num.push(chars[i]); - i += 1; - } - tokens.push(Token::Number(num.parse()?)); - } - c if c.is_alphabetic() || c == '_' => { - let mut ident = String::new(); - while i < chars.len() - && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == ' ') - { - // Don't consume trailing spaces if next non-space is operator - if chars[i] == ' ' { - // Peek ahead past spaces to find the next word/token - let j = i + 1; - let next_nonspace = chars[j..].iter().find(|&&c| c != ' '); - if matches!( - next_nonspace, - Some('+') - | Some('-') - | Some('*') - | Some('/') - | Some('^') - | Some(')') - | Some(',') - | Some('<') - | Some('>') - | Some('=') - | Some('!') - | Some('"') - | None - ) { - break; - } - // Break if the identifier collected so far is a keyword - let trimmed = ident.trim_end().to_ascii_uppercase(); - if matches!( - trimmed.as_str(), - "WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF" - ) { - break; - } - // Also break if the next word is a keyword - let rest: String = chars[j..].iter().collect(); - let next_word: String = rest - .trim_start() - .chars() - .take_while(|c| c.is_alphanumeric() || *c == '_') - .collect(); - let upper = next_word.to_ascii_uppercase(); - if matches!( - upper.as_str(), - "WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF" - ) { - break; - } - } - ident.push(chars[i]); - i += 1; - } - let ident = ident.trim_end().to_string(); - tokens.push(Token::Ident(ident)); - } - c => return Err(anyhow!("Unexpected character '{c}' in expression")), + } else { + out.push(c); } } - Ok(tokens) + out } -fn parse_add_sub(tokens: &[Token], pos: &mut usize) -> Result { - let mut left = parse_mul_div(tokens, pos)?; - while *pos < tokens.len() { - let op = match &tokens[*pos] { - Token::Plus => BinOp::Add, - Token::Minus => BinOp::Sub, - _ => break, - }; - *pos += 1; - let right = parse_mul_div(tokens, pos)?; +// ---- small helpers ------------------------------------------------------ + +fn first_inner(pair: Pair<'_, Rule>) -> Pair<'_, Rule> { + pair.into_inner().next().expect(GRAMMAR_INVARIANT) +} + +/// Fold a left-associative binary-operator rule of the shape +/// `rule = { child ~ (op ~ child)* }` into a left-leaning BinOp tree. +/// The `match_op` closure is infallible — the grammar guarantees the +/// operator token is one of the expected alternatives. +fn fold_left_binop(pair: Pair, mut build_child: F, match_op: M) -> Expr +where + F: FnMut(Pair) -> Expr, + M: Fn(&str) -> BinOp, +{ + let mut pairs = pair.into_inner(); + let mut left = build_child(pairs.next().expect(GRAMMAR_INVARIANT)); + while let Some(op_pair) = pairs.next() { + let op = match_op(op_pair.as_str()); + let right = build_child(pairs.next().expect(GRAMMAR_INVARIANT)); left = Expr::BinOp(op, Box::new(left), Box::new(right)); } - Ok(left) -} - -fn parse_mul_div(tokens: &[Token], pos: &mut usize) -> Result { - let mut left = parse_pow(tokens, pos)?; - while *pos < tokens.len() { - let op = match &tokens[*pos] { - Token::Star => BinOp::Mul, - Token::Slash => BinOp::Div, - _ => break, - }; - *pos += 1; - let right = parse_pow(tokens, pos)?; - left = Expr::BinOp(op, Box::new(left), Box::new(right)); - } - Ok(left) -} - -fn parse_pow(tokens: &[Token], pos: &mut usize) -> Result { - let base = parse_unary(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::Caret { - *pos += 1; - let exp = parse_unary(tokens, pos)?; - return Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp))); - } - Ok(base) -} - -fn parse_unary(tokens: &[Token], pos: &mut usize) -> Result { - if *pos < tokens.len() && tokens[*pos] == Token::Minus { - *pos += 1; - let e = parse_primary(tokens, pos)?; - return Ok(Expr::UnaryMinus(Box::new(e))); - } - parse_primary(tokens, pos) -} - -fn parse_primary(tokens: &[Token], pos: &mut usize) -> Result { - if *pos >= tokens.len() { - return Err(anyhow!("Unexpected end of expression")); - } - match &tokens[*pos].clone() { - Token::Number(n) => { - *pos += 1; - Ok(Expr::Number(*n)) - } - Token::Ident(name) => { - let name = name.clone(); - *pos += 1; - // Check for function call - let upper = name.to_ascii_uppercase(); - match upper.as_str() { - "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" => { - let func = match upper.as_str() { - "SUM" => AggFunc::Sum, - "AVG" => AggFunc::Avg, - "MIN" => AggFunc::Min, - "MAX" => AggFunc::Max, - "COUNT" => AggFunc::Count, - _ => unreachable!(), - }; - if *pos < tokens.len() && tokens[*pos] == Token::LParen { - *pos += 1; - let inner = parse_add_sub(tokens, pos)?; - // Optional WHERE filter - let filter = if *pos < tokens.len() { - if let Token::Ident(kw) = &tokens[*pos] { - if kw.eq_ignore_ascii_case("WHERE") { - *pos += 1; - let cat = match &tokens[*pos] { - Token::Ident(s) => { - let s = s.clone(); - *pos += 1; - s - } - t => { - return Err(anyhow!( - "Expected category name, got {t:?}" - )); - } - }; - // expect = - if *pos < tokens.len() && tokens[*pos] == Token::Eq { - *pos += 1; - } - let item = match &tokens[*pos] { - Token::Str(s) | Token::Ident(s) => { - let s = s.clone(); - *pos += 1; - s - } - t => return Err(anyhow!("Expected item name, got {t:?}")), - }; - Some(Filter { - category: cat, - item, - }) - } else { - None - } - } else { - None - } - } else { - None - }; - // expect ) - if *pos < tokens.len() && tokens[*pos] == Token::RParen { - *pos += 1; - } else { - return Err(anyhow!("Expected ')' to close aggregate function")); - } - return Ok(Expr::Agg(func, Box::new(inner), filter)); - } - Ok(Expr::Ref(name)) - } - "IF" => { - if *pos < tokens.len() && tokens[*pos] == Token::LParen { - *pos += 1; - let cond = parse_comparison(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::Comma { - *pos += 1; - } - let then = parse_add_sub(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::Comma { - *pos += 1; - } - let else_ = parse_add_sub(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::RParen { - *pos += 1; - } else { - return Err(anyhow!("Expected ')' to close IF(...)")); - } - return Ok(Expr::If(Box::new(cond), Box::new(then), Box::new(else_))); - } - Ok(Expr::Ref(name)) - } - _ => Ok(Expr::Ref(name)), - } - } - Token::LParen => { - *pos += 1; - let e = parse_add_sub(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::RParen { - *pos += 1; - } - Ok(e) - } - t => Err(anyhow!("Unexpected token in expression: {t:?}")), - } -} - -fn parse_comparison(tokens: &[Token], pos: &mut usize) -> Result { - let left = parse_add_sub(tokens, pos)?; - if *pos >= tokens.len() { - return Ok(left); - } - let op = match &tokens[*pos] { - Token::Eq => BinOp::Eq, - Token::Ne => BinOp::Ne, - Token::Lt => BinOp::Lt, - Token::Gt => BinOp::Gt, - Token::Le => BinOp::Le, - Token::Ge => BinOp::Ge, - _ => return Ok(left), - }; - *pos += 1; - let right = parse_add_sub(tokens, pos)?; - Ok(Expr::BinOp(op, Box::new(left), Box::new(right))) + left } #[cfg(test)] @@ -544,8 +380,8 @@ mod tests { assert_eq!(filter.item, "East"); } - /// Regression: WHERE inside aggregate parens must tokenize correctly. - /// The tokenizer must not merge "Revenue WHERE" into a single identifier. + /// Regression: WHERE inside aggregate parens must parse as the + /// aggregate's inline filter, not as a top-level WHERE clause. #[test] fn parse_sum_with_inline_where_filter() { let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap(); @@ -562,7 +398,6 @@ mod tests { #[test] fn parse_if_with_comparison_operators() { - // Test each comparison operator in an IF expression let f = parse_formula("X = IF(A != 0, A, 1)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); @@ -583,7 +418,6 @@ mod tests { #[test] fn parse_where_with_quoted_string_inside_expression() { - // WHERE inside a formula string with quotes let f = parse_formula("X = Revenue WHERE Region = \"West Coast\"", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "West Coast"); @@ -650,11 +484,10 @@ mod tests { } } - // ── Quoted string in tokenizer ────────────────────────────────────── + // ── Quoted string in WHERE ────────────────────────────────────────── #[test] fn parse_quoted_string_in_where() { - // Quoted strings work in top-level WHERE clauses let f = parse_formula("X = Revenue WHERE Region = \"East\"", "Cat").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "East"); @@ -681,27 +514,21 @@ mod tests { assert!(parse_expr("").is_err()); } + // ── Multi-word identifiers must be pipe-quoted ────────────────────── + #[test] - fn tokenizer_breaks_at_where_keyword() { - use super::tokenize; - let tokens = tokenize("Revenue WHERE Region").unwrap(); - // Should produce 3 tokens: Ident("Revenue"), Ident("WHERE"), Ident("Region") - assert_eq!(tokens.len(), 3, "Expected 3 tokens, got: {tokens:?}"); + fn multi_word_bare_identifier_is_rejected() { + // Multi-word identifiers must be pipe-quoted; bare multi-word fails + // the `bare_name`-compatible grammar rule. + assert!(parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").is_err()); } - // ── Multi-word identifiers ────────────────────────────────────────── + // ── WHERE inside quotes in the expression ─────────────────────────── #[test] - fn parse_multi_word_identifier() { - let f = parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").unwrap(); - assert_eq!(f.target, "Total Revenue"); - } - - // ── WHERE inside quotes in split_where ────────────────────────────── - - #[test] - fn split_where_ignores_where_inside_quotes() { - // WHERE inside quotes should not be treated as a keyword + fn where_inside_quotes_is_not_a_keyword() { + // A filter value containing the literal text "WHERE" is parsed as + // a string, not as a nested WHERE keyword. let f = parse_formula("X = Revenue WHERE Region = \"WHERE\"", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "WHERE"); @@ -773,4 +600,471 @@ mod tests { panic!("Expected SUM with WHERE filter, got: {:?}", f.expr); } } + + // ── Pipe-quoted escape semantics ──────────────────────────────────── + + #[test] + fn pipe_quoted_escape_literal_pipe() { + // \| inside a pipe-quoted identifier is a literal pipe + let f = parse_formula("X = |A\\|B|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A|B"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_escape_double_backslash() { + // \\ inside a pipe-quoted identifier is a literal backslash + let f = parse_formula("X = |A\\\\B|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A\\B"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_escape_newline() { + // \n inside a pipe-quoted identifier is a literal newline + let f = parse_formula("X = |A\\nB|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A\nB"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_unknown_escape_preserved() { + // Any `\X` where X isn't |, \, or n is preserved verbatim as + // backslash-plus-character. The grammar's `"\\" ~ ANY` allows + // any following character; we just don't interpret it. + let f = parse_formula("X = |A\\zB|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A\\zB"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + // ── Operator precedence and associativity ────────────────────────── + + #[test] + fn mul_binds_tighter_than_add() { + // `A + B * C` must parse as Add(A, Mul(B, C)) + let f = parse_formula("X = A + B * C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(_))); + assert!(matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _))); + } else { + panic!("Expected Add with Mul on rhs, got: {:?}", f.expr); + } + } + + #[test] + fn pow_binds_tighter_than_mul() { + // `A * B ^ C` must parse as Mul(A, Pow(B, C)) + let f = parse_formula("X = A * B ^ C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Mul, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(_))); + assert!(matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _))); + } else { + panic!("Expected Mul with Pow on rhs, got: {:?}", f.expr); + } + } + + #[test] + fn subtraction_is_left_associative() { + // `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C)) + let f = parse_formula("X = A - B - C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Sub, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _))); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); + } else { + panic!("Expected left-associative Sub, got: {:?}", f.expr); + } + } + + #[test] + fn division_is_left_associative() { + // `A / B / C` must parse as Div(Div(A, B), C) + let f = parse_formula("X = A / B / C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Div, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::BinOp(BinOp::Div, _, _))); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); + } else { + panic!("Expected left-associative Div, got: {:?}", f.expr); + } + } + + #[test] + fn unary_minus_before_pow() { + // `-A ^ B` — hand-rolled parser parses this as UnaryMinus(Ref(A)) + // followed by nothing because pow is inside unary's recursion. + // My grammar: unary = unary_minus | primary, unary_minus = "-" ~ primary. + // So `-A` is UnaryMinus(Ref(A)); `^B` can't attach because pow_expr + // sits OUTSIDE unary. Result: pow_expr(unary_minus(A)) — no pow + // because the optional pow_op doesn't have another unary to bind to. + // Verify: `-A ^ B` parses to BinOp(Pow, UnaryMinus(A), B). + let f = parse_formula("X = -A ^ B", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Pow, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::UnaryMinus(_))); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "B")); + } else { + panic!("Expected Pow(UnaryMinus, Ref), got: {:?}", f.expr); + } + } + + // ── Number literal variants ──────────────────────────────────────── + + #[test] + fn integer_literal() { + let f = parse_formula("X = 42", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 42.0).abs() < 1e-10)); + } + + #[test] + fn zero_literal() { + let f = parse_formula("X = 0", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if n == 0.0)); + } + + #[test] + fn decimal_literal_without_integer_part() { + let f = parse_formula("X = .5", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 0.5).abs() < 1e-10)); + } + + #[test] + fn decimal_literal_with_trailing_dot() { + let f = parse_formula("X = 5.", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 5.0).abs() < 1e-10)); + } + + #[test] + fn decimal_literal_with_integer_and_fraction() { + let f = parse_formula("X = 123.456", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 123.456).abs() < 1e-10)); + } + + // ── Filter value variants ────────────────────────────────────────── + + #[test] + fn where_with_bare_identifier_value() { + // filter_value = { string | pipe_quoted | bare_ident } — exercise + // the bare_ident branch. + let f = parse_formula("X = Revenue WHERE Region = East", "Cat").unwrap(); + let filter = f.filter.as_ref().unwrap(); + assert_eq!(filter.category, "Region"); + assert_eq!(filter.item, "East"); + } + + // ── Nested constructs ────────────────────────────────────────────── + + #[test] + fn nested_sum_aggregate() { + // Nested aggregates — outer SUM wrapping an inner SUM. + let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap(); + if let Expr::Agg(AggFunc::Sum, outer_inner, None) = &f.expr { + assert!(matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None))); + } else { + panic!("Expected nested SUM aggregates, got: {:?}", f.expr); + } + } + + #[test] + fn deeply_nested_parens() { + // Parens should flatten away without affecting the AST. + let f = parse_formula("X = (((((A + B)))))", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + #[test] + fn nested_if_expression() { + // IF in the then-branch of another IF. + let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap(); + if let Expr::If(_, then_e, else_e) = &f.expr { + assert!(matches!(**then_e, Expr::If(_, _, _))); + assert!(matches!(**else_e, Expr::Number(n) if n == 3.0)); + } else { + panic!("Expected outer IF, got: {:?}", f.expr); + } + } + + // ── Whitespace tolerance ─────────────────────────────────────────── + + #[test] + fn tolerates_tabs_between_tokens() { + let f = parse_formula("X\t=\tA\t+\tB", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + #[test] + fn tolerates_extra_spaces_between_tokens() { + let f = parse_formula("X = A + B", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + #[test] + fn tolerates_leading_and_trailing_whitespace() { + let f = parse_formula(" X = A + B ", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + // ── Case insensitivity of keywords ───────────────────────────────── + + #[test] + fn aggregate_function_is_case_insensitive() { + // The grammar uses ^"SUM" which is case-insensitive. + let f = parse_formula("X = sum(Revenue)", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); + + let f = parse_formula("X = Sum(Revenue)", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); + } + + #[test] + fn if_keyword_is_case_insensitive() { + let f = parse_formula("X = if(A > B, 1, 0)", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::If(_, _, _))); + } + + #[test] + fn where_keyword_is_case_insensitive() { + let f = parse_formula("X = Revenue where Region = \"East\"", "Cat").unwrap(); + assert!(f.filter.is_some()); + } + + // ── Target variants ──────────────────────────────────────────────── + + #[test] + fn target_with_underscore_and_hyphen() { + let f = parse_formula("my_target-name = A", "Cat").unwrap(); + assert_eq!(f.target, "my_target-name"); + } + + #[test] + fn pipe_quoted_target_preserves_pipes() { + let f = parse_formula("|My Target| = A", "Cat").unwrap(); + assert_eq!(f.target, "|My Target|"); + } +} + +// ---- grammar-driven proptests ------------------------------------------ +// +// Uses pest_meta to read formula.pest at test time and walks the grammar +// AST to generate random valid formulas. Mirrors the pattern used by the +// persistence parser tests in src/persistence/mod.rs. + +#[cfg(test)] +mod generator { + use pest_meta::ast::{Expr, RuleType}; + use pest_meta::parser; + use proptest::prelude::*; + use std::collections::HashMap; + + /// Parse formula.pest and return rules keyed by name. + fn load_grammar() -> HashMap { + let grammar = include_str!("formula.pest"); + let pairs = parser::parse(parser::Rule::grammar_rules, grammar) + .unwrap_or_else(|e| panic!("Bad grammar: {e}")); + let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}")); + rules + .into_iter() + .map(|r| (r.name.clone(), (r.ty, r.expr))) + .collect() + } + + /// Recursive string generator driven by a pest `Expr`. `choices` is + /// consumed left-to-right at every decision point; the `atomic` flag + /// controls whether sequences insert a space between their children, + /// so that non-atomic grammar rules produce whitespace-separated + /// tokens that pest's implicit WHITESPACE handling accepts. + pub struct Gen<'g> { + rules: &'g HashMap, + choices: Vec, + pos: usize, + } + + impl<'g> Gen<'g> { + pub fn new(rules: &'g HashMap, choices: Vec) -> Self { + Self { + rules, + choices, + pos: 0, + } + } + + fn pick(&mut self) -> u8 { + let v = self.choices.get(self.pos).copied().unwrap_or(0); + self.pos += 1; + v + } + + fn emit(&mut self, expr: &Expr, out: &mut String, atomic: bool) { + match expr { + Expr::Str(s) => out.push_str(s), + Expr::Insens(s) => out.push_str(s), + Expr::Range(lo, hi) => { + let lo = lo.chars().next().unwrap() as u32; + let hi = hi.chars().next().unwrap() as u32; + let range = hi - lo + 1; + let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap(); + out.push(ch); + } + Expr::Ident(name) => self.emit_ident(name, out, atomic), + Expr::Seq(a, b) => { + self.emit(a, out, atomic); + if !atomic { + out.push(' '); + } + self.emit(b, out, atomic); + } + Expr::Choice(a, b) => { + let mut alts: Vec<&Expr> = vec![a.as_ref()]; + let mut cur = b.as_ref(); + while let Expr::Choice(l, r) = cur { + alts.push(l.as_ref()); + cur = r.as_ref(); + } + alts.push(cur); + let idx = self.pick() as usize % alts.len(); + self.emit(alts[idx], out, atomic); + } + Expr::Opt(inner) => { + // ~66% chance of emitting + if !self.pick().is_multiple_of(3) { + self.emit(inner, out, atomic); + } + } + Expr::Rep(inner) => { + let count = self.pick() % 4; + for i in 0..count { + if i > 0 && !atomic { + out.push(' '); + } + self.emit(inner, out, atomic); + } + } + Expr::RepOnce(inner) => { + let count = 1 + self.pick() % 3; + for i in 0..count { + if i > 0 && !atomic { + out.push(' '); + } + self.emit(inner, out, atomic); + } + } + Expr::NegPred(_) | Expr::PosPred(_) => { + // Lookaheads don't emit output. + } + _ => { + // Skip unsupported expressions. + } + } + } + + fn emit_ident(&mut self, name: &str, out: &mut String, atomic: bool) { + match name { + "ANY" | "ASCII_ALPHA" => { + out.push((b'a' + self.pick() % 26) as char); + } + "ASCII_ALPHANUMERIC" => { + if self.pick().is_multiple_of(2) { + out.push((b'a' + self.pick() % 26) as char); + } else { + out.push((b'0' + self.pick() % 10) as char); + } + } + "ASCII_DIGIT" => { + out.push((b'0' + self.pick() % 10) as char); + } + "NEWLINE" => out.push('\n'), + "SOI" | "EOI" => {} + _ => { + if let Some((ty, inner)) = self.rules.get(name).cloned() { + let inner_atomic = atomic + || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); + self.emit(&inner, out, inner_atomic); + } + } + } + } + + pub fn generate(&mut self, rule_name: &str) -> String { + let mut out = String::new(); + if let Some((ty, expr)) = self.rules.get(rule_name).cloned() { + let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); + self.emit(&expr, &mut out, atomic); + } + out + } + } + + /// Proptest strategy: generate a random valid formula by walking the + /// grammar's `formula` rule. + pub fn formula_string() -> impl Strategy { + prop::collection::vec(any::(), 32..=128).prop_map(|choices| { + let rules = load_grammar(); + Gen::new(&rules, choices).generate("formula") + }) + } + + /// Proptest strategy: generate a random valid standalone expression. + pub fn expr_string() -> impl Strategy { + prop::collection::vec(any::(), 32..=128).prop_map(|choices| { + let rules = load_grammar(); + Gen::new(&rules, choices).generate("expr_eoi") + }) + } +} + +#[cfg(test)] +mod grammar_prop_tests { + use super::generator; + use super::{parse_expr, parse_formula}; + use proptest::prelude::*; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Every generator-produced formula parses without error. + #[test] + fn generated_formula_parses(formula in generator::formula_string()) { + let result = parse_formula(&formula, "Cat"); + prop_assert!( + result.is_ok(), + "Generated formula failed to parse:\n{}\nError: {}", + formula, + result.unwrap_err() + ); + } + + /// Every generator-produced standalone expression parses. + #[test] + fn generated_expr_parses(expr in generator::expr_string()) { + let result = parse_expr(&expr); + prop_assert!( + result.is_ok(), + "Generated expression failed to parse:\n{}\nError: {}", + expr, + result.unwrap_err() + ); + } + + /// Parsing the same input twice is deterministic — the debug + /// representation of the resulting AST is identical. + #[test] + fn parse_is_deterministic(formula in generator::formula_string()) { + let r1 = parse_formula(&formula, "Cat"); + let r2 = parse_formula(&formula, "Cat"); + prop_assume!(r1.is_ok() && r2.is_ok()); + let f1 = r1.unwrap(); + let f2 = r2.unwrap(); + prop_assert_eq!(&f1.target, &f2.target); + prop_assert_eq!(format!("{:?}", f1.expr), format!("{:?}", f2.expr)); + } + } } diff --git a/src/command/cmd/commit.rs b/src/command/cmd/commit.rs index 0e17592..80a3cd8 100644 --- a/src/command/cmd/commit.rs +++ b/src/command/cmd/commit.rs @@ -137,27 +137,89 @@ mod tests { // ── Commit commands (mode-specific buffer consumers) ──────────────────────── /// Commit a cell value: for synthetic records keys, stage in drill pending edits -/// or apply directly; for real keys, write to the model. -fn commit_cell_value(key: &CellKey, value: &str, effects: &mut Vec>) { - if let Some((record_idx, col_name)) = crate::view::synthetic_record_info(key) { - effects.push(Box::new(effect::SetDrillPendingEdit { - record_idx, - col_name, - new_value: value.to_string(), - })); - } else if value.is_empty() { +/// in drill mode, or apply directly in plain records mode; for real keys, write +/// to the model. +fn commit_regular_cell_value(key: &CellKey, value: &str, effects: &mut Vec>) { + if value.is_empty() { effects.push(Box::new(effect::ClearCell(key.clone()))); - effects.push(effect::mark_dirty()); } else if let Ok(n) = value.parse::() { effects.push(Box::new(effect::SetCell(key.clone(), CellValue::Number(n)))); - effects.push(effect::mark_dirty()); } else { effects.push(Box::new(effect::SetCell( key.clone(), CellValue::Text(value.to_string()), ))); - effects.push(effect::mark_dirty()); } + effects.push(effect::mark_dirty()); +} + +/// Stage a synthetic edit in drill state so it can be applied atomically on exit. +fn stage_drill_edit(record_idx: usize, col_name: String, value: &str) -> Box { + Box::new(effect::SetDrillPendingEdit { + record_idx, + col_name, + new_value: value.to_string(), + }) +} + +/// Apply a synthetic records-mode edit directly to the underlying model cell. +fn commit_plain_records_edit( + ctx: &CmdContext, + record_idx: usize, + col_name: &str, + value: &str, + effects: &mut Vec>, +) { + let Some((orig_key, _)) = ctx + .layout + .records + .as_ref() + .and_then(|records| records.get(record_idx)) + else { + return; + }; + + if col_name == "Value" { + commit_regular_cell_value(orig_key, value, effects); + return; + } + + if value.is_empty() { + effects.push(effect::set_status(effect::RECORD_COORDS_CANNOT_BE_EMPTY)); + return; + } + + let Some(existing_value) = ctx.model.get_cell(orig_key).cloned() else { + return; + }; + effects.push(Box::new(effect::ClearCell(orig_key.clone()))); + effects.push(Box::new(effect::AddItem { + category: col_name.to_string(), + item: value.to_string(), + })); + effects.push(Box::new(effect::SetCell( + orig_key.clone().with(col_name, value), + existing_value, + ))); + effects.push(effect::mark_dirty()); +} + +fn commit_cell_value( + ctx: &CmdContext, + key: &CellKey, + value: &str, + effects: &mut Vec>, +) { + if let Some((record_idx, col_name)) = crate::view::synthetic_record_info(key) { + if ctx.has_drill_state { + effects.push(stage_drill_edit(record_idx, col_name, value)); + return; + } + commit_plain_records_edit(ctx, record_idx, &col_name, value, effects); + return; + } + + commit_regular_cell_value(key, value, effects); } /// Direction to advance after committing a cell edit. @@ -187,7 +249,7 @@ impl Cmd for CommitAndAdvance { } fn execute(&self, ctx: &CmdContext) -> Vec> { let mut effects: Vec> = Vec::new(); - commit_cell_value(&self.key, &self.value, &mut effects); + commit_cell_value(ctx, &self.key, &self.value, &mut effects); match self.advance { AdvanceDir::Down => { let adv = EnterAdvance { diff --git a/src/command/cmd/core.rs b/src/command/cmd/core.rs index 08fbad5..4134f70 100644 --- a/src/command/cmd/core.rs +++ b/src/command/cmd/core.rs @@ -43,6 +43,8 @@ pub struct CmdContext<'a> { /// View navigation stacks (for drill back/forward) pub view_back_stack: &'a [String], pub view_forward_stack: &'a [String], + /// Whether the app currently has an active drill snapshot. + pub has_drill_state: bool, /// Display value at the cursor — works uniformly for pivot and records mode. pub display_value: String, /// How many data rows/cols fit on screen (for viewport scrolling). @@ -55,9 +57,22 @@ pub struct CmdContext<'a> { } impl<'a> CmdContext<'a> { + /// Return true when the current layout is a records-mode layout. + pub fn is_records_mode(&self) -> bool { + self.layout.is_records_mode() + } + pub fn cell_key(&self) -> Option { self.layout.cell_key(self.selected.0, self.selected.1) } + + /// Return synthetic record coordinates for the current cursor, if any. + pub fn synthetic_record_at_cursor(&self) -> Option<(usize, String)> { + self.cell_key() + .as_ref() + .and_then(crate::view::synthetic_record_info) + } + pub fn row_count(&self) -> usize { self.layout.row_count() } diff --git a/src/command/cmd/grid.rs b/src/command/cmd/grid.rs index 95b945a..74976e3 100644 --- a/src/command/cmd/grid.rs +++ b/src/command/cmd/grid.rs @@ -443,12 +443,7 @@ impl Cmd for AddRecordRow { "add-record-row" } fn execute(&self, ctx: &CmdContext) -> Vec> { - let is_records = ctx - .cell_key() - .as_ref() - .and_then(crate::view::synthetic_record_info) - .is_some(); - if !is_records { + if !ctx.is_records_mode() { return vec![effect::set_status( "add-record-row only works in records mode", )]; diff --git a/src/command/cmd/mod.rs b/src/command/cmd/mod.rs index d9993f0..e7d1836 100644 --- a/src/command/cmd/mod.rs +++ b/src/command/cmd/mod.rs @@ -71,6 +71,7 @@ pub(super) mod test_helpers { buffers: &EMPTY_BUFFERS, view_back_stack: &[], view_forward_stack: &[], + has_drill_state: false, display_value: { let key = layout.cell_key(sr, sc); key.as_ref() diff --git a/src/command/cmd/mode.rs b/src/command/cmd/mode.rs index 741f7d3..6719f7b 100644 --- a/src/command/cmd/mode.rs +++ b/src/command/cmd/mode.rs @@ -228,11 +228,7 @@ impl Cmd for EditOrDrill { .unwrap_or(false) }); // In records mode (synthetic key), always edit directly — no drilling. - let is_synthetic = ctx - .cell_key() - .as_ref() - .and_then(crate::view::synthetic_record_info) - .is_some(); + let is_synthetic = ctx.synthetic_record_at_cursor().is_some(); let is_aggregated = !is_synthetic && regular_none; if is_aggregated { let Some(key) = ctx.cell_key().clone() else { diff --git a/src/model/types.rs b/src/model/types.rs index 96787e2..cfc7b69 100644 --- a/src/model/types.rs +++ b/src/model/types.rs @@ -1831,7 +1831,7 @@ mod five_category { assert_eq!(v.axis_of("Product"), Axis::Column); assert_eq!(v.axis_of("Channel"), Axis::Page); assert_eq!(v.axis_of("Time"), Axis::Page); - assert_eq!(v.axis_of("_Measure"), Axis::None); + assert_eq!(v.axis_of("_Measure"), Axis::Page); } #[test] diff --git a/src/ui/app.rs b/src/ui/app.rs index d9d62fd..35ed666 100644 --- a/src/ui/app.rs +++ b/src/ui/app.rs @@ -280,6 +280,7 @@ impl App { tile_cat_idx: self.tile_cat_idx, view_back_stack: &self.view_back_stack, view_forward_stack: &self.view_forward_stack, + has_drill_state: self.drill_state.is_some(), display_value: { let key = layout.cell_key(sel_row, sel_col); if let Some(k) = &key { @@ -708,6 +709,167 @@ mod tests { ); } + /// Regression: pressing `o` in an empty records view should create the + /// first synthetic row instead of only entering edit mode on empty space. + #[test] + fn add_record_row_in_empty_records_view_creates_first_row() { + let mut wb = Workbook::new("T"); + wb.add_category("Region").unwrap(); + wb.model.category_mut("Region").unwrap().add_item("East"); + let mut app = App::new(wb, None); + + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + assert!(app.layout.is_records_mode(), "R should enter records mode"); + assert_eq!(app.layout.row_count(), 0, "fresh records view starts empty"); + + app.handle_key(KeyEvent::new(KeyCode::Char('o'), KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.layout.row_count(), + 1, + "o should create the first record row in an empty records view" + ); + assert!( + matches!(app.mode, AppMode::Editing { .. }), + "o should leave the app in edit mode, got {:?}", + app.mode + ); + } + + /// Regression: editing the first row in a blank model's records view + /// should persist the typed value even though plain records mode does not + /// use drill state. With _Measure as the first column, `o` lands on it; + /// type a measure name, Tab to Value, type the number, Enter to commit. + #[test] + fn edit_record_row_in_blank_model_persists_value() { + use crate::model::cell::CellKey; + + let mut app = App::new(Workbook::new("T"), None); + + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + // `o` adds a record row and enters edit at (0, 0) = _Measure column + app.handle_key(KeyEvent::new(KeyCode::Char('o'), KeyModifiers::NONE)) + .unwrap(); + // Type a measure name + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('e'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('v'), KeyModifiers::NONE)) + .unwrap(); + // Tab to commit _Measure and move to Value column + app.handle_key(KeyEvent::new(KeyCode::Tab, KeyModifiers::NONE)) + .unwrap(); + // Type the value + app.handle_key(KeyEvent::new(KeyCode::Char('5'), KeyModifiers::NONE)) + .unwrap(); + // Enter to commit + app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.workbook.model.get_cell(&CellKey::new(vec![ + ("_Measure".to_string(), "Rev".to_string()), + ])), + Some(&crate::model::cell::CellValue::Number(5.0)), + "editing a synthetic row in plain records mode should write the value" + ); + } + + /// Drill-view edits should stay staged in drill state until the user + /// navigates back, at which point ApplyAndClearDrill writes them through. + #[test] + fn drill_edit_is_staged_until_view_back() { + use crate::model::cell::{CellKey, CellValue}; + + let mut wb = Workbook::new("T"); + wb.add_category("Region").unwrap(); + wb.add_category("Month").unwrap(); + wb.model.category_mut("Region").unwrap().add_item("East"); + wb.model.category_mut("Month").unwrap().add_item("Jan"); + let record_key = CellKey::new(vec![ + ("Month".to_string(), "Jan".to_string()), + ("Region".to_string(), "East".to_string()), + ]); + wb.model.set_cell(record_key.clone(), CellValue::Number(1.0)); + let mut app = App::new(wb, None); + + app.handle_key(KeyEvent::new(KeyCode::Char('>'), KeyModifiers::NONE)) + .unwrap(); + assert!(app.drill_state.is_some(), "drill should create drill state"); + let value_col = (0..app.layout.col_count()) + .find(|&col| app.layout.col_label(col) == "Value") + .expect("drill view should include a Value column"); + app.workbook.active_view_mut().selected = (0, value_col); + app.handle_key(KeyEvent::new(KeyCode::Char('i'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('9'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.workbook.model.get_cell(&record_key), + Some(&CellValue::Number(1.0)), + "drill edit should remain staged until leaving the drill view" + ); + assert_eq!( + app.drill_state + .as_ref() + .and_then(|s| s.pending_edits.get(&(0, "Value".to_string()))), + Some(&"9".to_string()), + "drill edit should be recorded in pending_edits" + ); + + app.handle_key(KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('<'), KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.workbook.model.get_cell(&record_key), + Some(&CellValue::Number(9.0)), + "leaving drill view should apply the staged edit" + ); + } + + /// Suspected bug: blanking a records-mode category coordinate should not + /// create an item with an empty name. + #[test] + fn blanking_records_category_does_not_create_empty_item() { + use crate::model::cell::{CellKey, CellValue}; + + let mut wb = Workbook::new("T"); + wb.add_category("Region").unwrap(); + wb.model.category_mut("Region").unwrap().add_item("East"); + wb.model.set_cell( + CellKey::new(vec![("Region".to_string(), "East".to_string())]), + CellValue::Number(1.0), + ); + let mut app = App::new(wb, None); + + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('i'), KeyModifiers::NONE)) + .unwrap(); + for _ in 0..4 { + app.handle_key(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)) + .unwrap(); + } + app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) + .unwrap(); + + assert!( + !app.workbook.model.category("Region").unwrap().items.contains_key(""), + "records-mode edits should not create empty category items" + ); + } + #[test] fn command_mode_buffer_cleared_on_reentry() { use crossterm::event::KeyEvent; diff --git a/src/ui/effect.rs b/src/ui/effect.rs index d9a7489..ca60fb1 100644 --- a/src/ui/effect.rs +++ b/src/ui/effect.rs @@ -6,6 +6,8 @@ use crate::view::Axis; use super::app::{App, AppMode}; +pub(crate) const RECORD_COORDS_CANNOT_BE_EMPTY: &str = "Record coordinates cannot be empty"; + /// A discrete state change produced by a command. /// Effects know how to apply themselves to the App. pub trait Effect: Debug { @@ -459,6 +461,10 @@ impl Effect for ApplyAndClearDrill { }; app.workbook.model.set_cell(orig_key.clone(), value); } else { + if new_value.is_empty() { + app.status_msg = RECORD_COORDS_CANNOT_BE_EMPTY.to_string(); + continue; + } // Rename a coordinate: remove old cell, insert new with updated coord let value = match app.workbook.model.get_cell(orig_key) { Some(v) => v.clone(), diff --git a/src/view/layout.rs b/src/view/layout.rs index 441d97a..9ed6be4 100644 --- a/src/view/layout.rs +++ b/src/view/layout.rs @@ -1,6 +1,7 @@ use std::rc::Rc; use crate::model::Model; +use crate::model::category::CategoryKind; use crate::model::cell::{CellKey, CellValue}; use crate::view::{Axis, View}; @@ -92,14 +93,13 @@ impl GridLayout { let page_coords = page_cats .iter() - .map(|cat| { + .filter_map(|cat| { let items: Vec = model.effective_item_names(cat); let sel = view .page_selection(cat) .map(String::from) - .or_else(|| items.first().cloned()) - .unwrap_or_default(); - (cat.clone(), sel) + .or_else(|| items.first().cloned())?; + Some((cat.clone(), sel)) }) .collect(); @@ -132,22 +132,12 @@ impl GridLayout { page_coords: Vec<(String, String)>, none_cats: Vec, ) -> Self { - // Filter cells by page_coords - let partial: Vec<(String, String)> = page_coords.clone(); - let mut records: Vec<(CellKey, CellValue)> = if partial.is_empty() { - model - .data - .iter_cells() - .map(|(k, v)| (k, v.clone())) - .collect() - } else { - model - .data - .matching_cells(&partial) - .into_iter() - .map(|(k, v)| (k, v.clone())) - .collect() - }; + let mut records: Vec<(CellKey, CellValue)> = model + .data + .matching_cells(&page_coords) + .into_iter() + .map(|(k, v)| (k, v.clone())) + .collect(); // Sort for deterministic ordering records.sort_by(|a, b| a.0.0.cmp(&b.0.0)); @@ -157,12 +147,22 @@ impl GridLayout { .collect(); // Synthesize col items: one per non-virtual category + "Value" - let cat_names: Vec = model + let mut cat_names: Vec = model .category_names() .into_iter() - .filter(|c| !c.starts_with('_')) + .filter(|c| { + let kind = model.category(c).map(|cat| cat.kind); + !matches!(kind, Some(CategoryKind::VirtualIndex | CategoryKind::VirtualDim)) + }) .map(String::from) .collect(); + // _Measure goes last among category columns (right before Value) + cat_names.sort_by_key(|c| { + matches!( + model.category(c).map(|cat| cat.kind), + Some(CategoryKind::VirtualMeasure) + ) + }); let mut col_items: Vec = cat_names .iter() .map(|c| AxisEntry::DataItem(vec![c.clone()])) @@ -596,6 +596,8 @@ mod tests { ]), CellValue::Number(50.0), ); + // Records mode setup: _Measure should not filter + wb.active_view_mut().set_axis("_Measure", Axis::None); wb } @@ -701,6 +703,35 @@ mod tests { assert_eq!(dim, "Region"); } + /// Regression test for improvise-rbv: records mode should include _Measure + /// as a _Dim column so the measure name is visible per-record, and "Value" + /// must remain the last column. + #[test] + fn records_mode_includes_measure_in_dim_columns() { + let mut wb = records_workbook(); + let v = wb.active_view_mut(); + v.set_axis("_Index", Axis::Row); + v.set_axis("_Dim", Axis::Column); + let layout = GridLayout::new(&wb.model, wb.active_view()); + assert!(layout.is_records_mode()); + let cols: Vec = (0..layout.col_count()) + .map(|i| layout.col_label(i)) + .collect(); + assert!( + cols.contains(&"_Measure".to_string()), + "records mode should include _Measure column; got {:?}", + cols + ); + assert!(cols.contains(&"Region".to_string())); + assert!(cols.contains(&"Value".to_string())); + assert_eq!( + cols.last().unwrap(), + "Value", + "Value must be the last column so cursor defaults land on it; got {:?}", + cols + ); + } + fn coord(pairs: &[(&str, &str)]) -> CellKey { CellKey::new( pairs diff --git a/src/workbook.rs b/src/workbook.rs index 5bb97a8..c1d1076 100644 --- a/src/workbook.rs +++ b/src/workbook.rs @@ -26,7 +26,9 @@ pub struct Workbook { impl Workbook { /// Create a new workbook with a fresh `Model` and a single `Default` view. /// Virtual categories (`_Index`, `_Dim`, `_Measure`) are registered on the - /// default view with their conventional axes (`_Index`=Row, `_Dim`=Column). + /// default view with their conventional axes (`_Index`=Row, `_Dim`=Column, + /// `_Measure`=Page so aggregated pivot views show a single measure by + /// default; see improvise-kos). pub fn new(name: impl Into) -> Self { let model = Model::new(name); let mut views = IndexMap::new(); @@ -42,6 +44,7 @@ impl Workbook { } view.set_axis("_Index", Axis::Row); view.set_axis("_Dim", Axis::Column); + view.set_axis("_Measure", Axis::Page); } wb }