From 1cea06e14be42009001b3dcc35e2e68dcca0ed76 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Tue, 14 Apr 2026 01:26:29 -0700 Subject: [PATCH 01/13] fix(records): allow adding rows in empty records view Add helper methods to CmdContext to clarify layout state and synthetic record presence. Update AddRecordRow to check for records mode generally rather than requiring a synthetic record at the current cursor, which allows adding the first row to an empty records view. Includes a regression test for the empty records view scenario. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- src/command/cmd/core.rs | 13 +++++++++++++ src/command/cmd/grid.rs | 7 +------ src/command/cmd/mode.rs | 6 +----- src/ui/app.rs | 29 +++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/command/cmd/core.rs b/src/command/cmd/core.rs index 049d7dc..fa868c0 100644 --- a/src/command/cmd/core.rs +++ b/src/command/cmd/core.rs @@ -48,9 +48,22 @@ pub struct CmdContext<'a> { } impl<'a> CmdContext<'a> { + /// Return true when the current layout is a records-mode layout. + pub fn is_records_mode(&self) -> bool { + self.layout.is_records_mode() + } + pub fn cell_key(&self) -> Option { self.layout.cell_key(self.selected.0, self.selected.1) } + + /// Return synthetic record coordinates for the current cursor, if any. + pub fn synthetic_record_at_cursor(&self) -> Option<(usize, String)> { + self.cell_key() + .as_ref() + .and_then(crate::view::synthetic_record_info) + } + pub fn row_count(&self) -> usize { self.layout.row_count() } diff --git a/src/command/cmd/grid.rs b/src/command/cmd/grid.rs index cce6573..a0176ab 100644 --- a/src/command/cmd/grid.rs +++ b/src/command/cmd/grid.rs @@ -443,12 +443,7 @@ impl Cmd for AddRecordRow { "add-record-row" } fn execute(&self, ctx: &CmdContext) -> Vec> { - let is_records = ctx - .cell_key() - .as_ref() - .and_then(crate::view::synthetic_record_info) - .is_some(); - if !is_records { + if !ctx.is_records_mode() { return vec![effect::set_status( "add-record-row only works in records mode", )]; diff --git a/src/command/cmd/mode.rs b/src/command/cmd/mode.rs index db269ad..0a705f3 100644 --- a/src/command/cmd/mode.rs +++ b/src/command/cmd/mode.rs @@ -228,11 +228,7 @@ impl Cmd for EditOrDrill { .unwrap_or(false) }); // In records mode (synthetic key), always edit directly — no drilling. - let is_synthetic = ctx - .cell_key() - .as_ref() - .and_then(crate::view::synthetic_record_info) - .is_some(); + let is_synthetic = ctx.synthetic_record_at_cursor().is_some(); let is_aggregated = !is_synthetic && regular_none; if is_aggregated { let Some(key) = ctx.cell_key().clone() else { diff --git a/src/ui/app.rs b/src/ui/app.rs index 137c800..1fec708 100644 --- a/src/ui/app.rs +++ b/src/ui/app.rs @@ -712,6 +712,35 @@ mod tests { ); } + /// Regression: pressing `o` in an empty records view should create the + /// first synthetic row instead of only entering edit mode on empty space. + #[test] + fn add_record_row_in_empty_records_view_creates_first_row() { + let mut m = Model::new("T"); + m.add_category("Region").unwrap(); + m.category_mut("Region").unwrap().add_item("East"); + let mut app = App::new(m, None); + + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + assert!(app.layout.is_records_mode(), "R should enter records mode"); + assert_eq!(app.layout.row_count(), 0, "fresh records view starts empty"); + + app.handle_key(KeyEvent::new(KeyCode::Char('o'), KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.layout.row_count(), + 1, + "o should create the first record row in an empty records view" + ); + assert!( + matches!(app.mode, AppMode::Editing { .. }), + "o should leave the app in edit mode, got {:?}", + app.mode + ); + } + #[test] fn command_mode_buffer_cleared_on_reentry() { use crossterm::event::KeyEvent; From 60a8559a7f9f0997f10c3690fbfc056daa90de7f Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Tue, 14 Apr 2026 01:26:30 -0700 Subject: [PATCH 02/13] docs(design): add principle about decomposing instead of early returning Add a new design principle encouraging decomposition of functions instead of relying on early returns, as early returns often signal mixed responsibilities. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- context/design-principles.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/context/design-principles.md b/context/design-principles.md index 701ace0..bd3e5df 100644 --- a/context/design-principles.md +++ b/context/design-principles.md @@ -32,6 +32,12 @@ Commands compose via `Binding::Sequence` — a keymap entry can chain multiple commands, each contributing effects independently. The `o` key (add row + begin editing) is two commands composed at the binding level, not a monolithic handler. +### Decompose rather than early return + +Early Returns usually are a signal of mixed responsibilities: if an early return +would clarify a function, consider how the function could be decomposed for the +same effect without the early return. + --- ## 2. Polymorphism Over Conditionals From c79498b04b5fa1f615f0af48f50ff1ba93c9eac0 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Tue, 14 Apr 2026 01:43:32 -0700 Subject: [PATCH 03/13] chore: update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a4e2370..c80d176 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ bench/*.txt *.swp proptest-regressions/ .cursor +.claude/worktrees/ From 9710fb534e6590285ed29283bf414f4290bdf84b Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Tue, 14 Apr 2026 02:32:03 -0700 Subject: [PATCH 04/13] feat(cmd): allow direct commit of synthetic records in records mode Allow synthetic record edits to be applied directly to the model when not in drill mode, whereas they remain staged in drill state when a drill snapshot is active. This enables editing of records in plain records view. Additionally, add validation to prevent creating records with empty coordinates in both direct commits and when applying staged drill edits. Includes regression tests for persistence in blank models, drill state staging, and empty coordinate prevention. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- src/command/cmd/commit.rs | 70 +++++++++++++++++++--- src/command/cmd/core.rs | 2 + src/command/cmd/mod.rs | 1 + src/ui/app.rs | 121 ++++++++++++++++++++++++++++++++++++++ src/ui/effect.rs | 4 ++ 5 files changed, 191 insertions(+), 7 deletions(-) diff --git a/src/command/cmd/commit.rs b/src/command/cmd/commit.rs index fa78d4c..b969be8 100644 --- a/src/command/cmd/commit.rs +++ b/src/command/cmd/commit.rs @@ -137,14 +137,70 @@ mod tests { // ── Commit commands (mode-specific buffer consumers) ──────────────────────── /// Commit a cell value: for synthetic records keys, stage in drill pending edits -/// or apply directly; for real keys, write to the model. -fn commit_cell_value(key: &CellKey, value: &str, effects: &mut Vec>) { +/// in drill mode, or apply directly in plain records mode; for real keys, write +/// to the model. +fn commit_cell_value( + ctx: &CmdContext, + key: &CellKey, + value: &str, + effects: &mut Vec>, +) { if let Some((record_idx, col_name)) = crate::view::synthetic_record_info(key) { - effects.push(Box::new(effect::SetDrillPendingEdit { - record_idx, - col_name, - new_value: value.to_string(), + if ctx.has_drill_state { + effects.push(Box::new(effect::SetDrillPendingEdit { + record_idx, + col_name, + new_value: value.to_string(), + })); + return; + } + + let Some((orig_key, _)) = ctx + .layout + .records + .as_ref() + .and_then(|records| records.get(record_idx)) + else { + return; + }; + + if col_name == "Value" { + if value.is_empty() { + effects.push(Box::new(effect::ClearCell(orig_key.clone()))); + } else if let Ok(n) = value.parse::() { + effects.push(Box::new(effect::SetCell( + orig_key.clone(), + CellValue::Number(n), + ))); + } else { + effects.push(Box::new(effect::SetCell( + orig_key.clone(), + CellValue::Text(value.to_string()), + ))); + } + effects.push(effect::mark_dirty()); + return; + } + + if value.is_empty() { + effects.push(effect::set_status("Record coordinates cannot be empty")); + return; + } + + let Some(existing_value) = ctx.model.get_cell(orig_key).cloned() else { + return; + }; + effects.push(Box::new(effect::ClearCell(orig_key.clone()))); + effects.push(Box::new(effect::AddItem { + category: col_name.clone(), + item: value.to_string(), })); + effects.push(Box::new(effect::SetCell( + orig_key.clone().with(col_name, value), + existing_value, + ))); + effects.push(effect::mark_dirty()); + return; } else if value.is_empty() { effects.push(Box::new(effect::ClearCell(key.clone()))); effects.push(effect::mark_dirty()); @@ -187,7 +243,7 @@ impl Cmd for CommitAndAdvance { } fn execute(&self, ctx: &CmdContext) -> Vec> { let mut effects: Vec> = Vec::new(); - commit_cell_value(&self.key, &self.value, &mut effects); + commit_cell_value(ctx, &self.key, &self.value, &mut effects); match self.advance { AdvanceDir::Down => { let adv = EnterAdvance { diff --git a/src/command/cmd/core.rs b/src/command/cmd/core.rs index fa868c0..d9d2742 100644 --- a/src/command/cmd/core.rs +++ b/src/command/cmd/core.rs @@ -36,6 +36,8 @@ pub struct CmdContext<'a> { /// View navigation stacks (for drill back/forward) pub view_back_stack: &'a [String], pub view_forward_stack: &'a [String], + /// Whether the app currently has an active drill snapshot. + pub has_drill_state: bool, /// Display value at the cursor — works uniformly for pivot and records mode. pub display_value: String, /// How many data rows/cols fit on screen (for viewport scrolling). diff --git a/src/command/cmd/mod.rs b/src/command/cmd/mod.rs index aa0daf3..79191c0 100644 --- a/src/command/cmd/mod.rs +++ b/src/command/cmd/mod.rs @@ -69,6 +69,7 @@ pub(super) mod test_helpers { buffers: &EMPTY_BUFFERS, view_back_stack: &[], view_forward_stack: &[], + has_drill_state: false, display_value: { let key = layout.cell_key(sr, sc); key.as_ref() diff --git a/src/ui/app.rs b/src/ui/app.rs index 1fec708..cd9f411 100644 --- a/src/ui/app.rs +++ b/src/ui/app.rs @@ -288,6 +288,7 @@ impl App { tile_cat_idx: self.tile_cat_idx, view_back_stack: &self.view_back_stack, view_forward_stack: &self.view_forward_stack, + has_drill_state: self.drill_state.is_some(), display_value: { let key = layout.cell_key(sel_row, sel_col); if let Some(k) = &key { @@ -741,6 +742,126 @@ mod tests { ); } + /// Regression: editing the first row in a blank model's records view + /// should persist the typed value even though plain records mode does not + /// use drill state. + #[test] + fn edit_record_row_in_blank_model_persists_value() { + use crate::model::cell::CellKey; + + let mut app = App::new(Model::new("T"), None); + + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('o'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('5'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.model.get_cell(&CellKey::new(vec![])), + Some(&crate::model::cell::CellValue::Number(5.0)), + "editing a synthetic row in plain records mode should write the value" + ); + } + + /// Drill-view edits should stay staged in drill state until the user + /// navigates back, at which point ApplyAndClearDrill writes them through. + #[test] + fn drill_edit_is_staged_until_view_back() { + use crate::model::cell::{CellKey, CellValue}; + + let mut m = Model::new("T"); + m.add_category("Region").unwrap(); + m.add_category("Month").unwrap(); + m.category_mut("Region").unwrap().add_item("East"); + m.category_mut("Month").unwrap().add_item("Jan"); + let record_key = CellKey::new(vec![ + ("Month".to_string(), "Jan".to_string()), + ("Region".to_string(), "East".to_string()), + ]); + m.set_cell(record_key.clone(), CellValue::Number(1.0)); + let mut app = App::new(m, None); + + app.handle_key(KeyEvent::new(KeyCode::Char('>'), KeyModifiers::NONE)) + .unwrap(); + assert!(app.drill_state.is_some(), "drill should create drill state"); + let value_col = (0..app.layout.col_count()) + .find(|&col| app.layout.col_label(col) == "Value") + .expect("drill view should include a Value column"); + app.model.active_view_mut().selected = (0, value_col); + app.handle_key(KeyEvent::new(KeyCode::Char('i'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('9'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.model.get_cell(&record_key), + Some(&CellValue::Number(1.0)), + "drill edit should remain staged until leaving the drill view" + ); + assert_eq!( + app.drill_state + .as_ref() + .and_then(|s| s.pending_edits.get(&(0, "Value".to_string()))), + Some(&"9".to_string()), + "drill edit should be recorded in pending_edits" + ); + + app.handle_key(KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('<'), KeyModifiers::NONE)) + .unwrap(); + + assert_eq!( + app.model.get_cell(&record_key), + Some(&CellValue::Number(9.0)), + "leaving drill view should apply the staged edit" + ); + } + + /// Suspected bug: blanking a records-mode category coordinate should not + /// create an item with an empty name. + #[test] + fn blanking_records_category_does_not_create_empty_item() { + use crate::model::cell::{CellKey, CellValue}; + + let mut m = Model::new("T"); + m.add_category("Region").unwrap(); + m.category_mut("Region").unwrap().add_item("East"); + m.set_cell( + CellKey::new(vec![("Region".to_string(), "East".to_string())]), + CellValue::Number(1.0), + ); + let mut app = App::new(m, None); + + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('i'), KeyModifiers::NONE)) + .unwrap(); + for _ in 0..4 { + app.handle_key(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)) + .unwrap(); + } + app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) + .unwrap(); + + assert!( + !app.model + .category("Region") + .unwrap() + .items + .contains_key(""), + "records-mode edits should not create empty category items" + ); + } + #[test] fn command_mode_buffer_cleared_on_reentry() { use crossterm::event::KeyEvent; diff --git a/src/ui/effect.rs b/src/ui/effect.rs index b00d853..7617632 100644 --- a/src/ui/effect.rs +++ b/src/ui/effect.rs @@ -458,6 +458,10 @@ impl Effect for ApplyAndClearDrill { }; app.model.set_cell(orig_key.clone(), value); } else { + if new_value.is_empty() { + app.status_msg = "Record coordinates cannot be empty".to_string(); + continue; + } // Rename a coordinate: remove old cell, insert new with updated coord let value = match app.model.get_cell(orig_key) { Some(v) => v.clone(), From 6f291ccd04ba94d25ee7332a0fb2a1d3207e29a1 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 03:01:30 -0700 Subject: [PATCH 05/13] refactor(ui): extract record coordinates error message to constant Extract the hardcoded error message "Record coordinates cannot be empty" into a public constant in effect.rs to avoid duplication and improve maintainability. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- src/ui/effect.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ui/effect.rs b/src/ui/effect.rs index 7617632..adfca98 100644 --- a/src/ui/effect.rs +++ b/src/ui/effect.rs @@ -6,6 +6,8 @@ use crate::view::Axis; use super::app::{App, AppMode}; +pub(crate) const RECORD_COORDS_CANNOT_BE_EMPTY: &str = "Record coordinates cannot be empty"; + /// A discrete state change produced by a command. /// Effects know how to apply themselves to the App. pub trait Effect: Debug { @@ -459,7 +461,7 @@ impl Effect for ApplyAndClearDrill { app.model.set_cell(orig_key.clone(), value); } else { if new_value.is_empty() { - app.status_msg = "Record coordinates cannot be empty".to_string(); + app.status_msg = RECORD_COORDS_CANNOT_BE_EMPTY.to_string(); continue; } // Rename a coordinate: remove old cell, insert new with updated coord From d8375ceaa7fce008ca3b5f88b0a19229d08ac02c Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 03:01:30 -0700 Subject: [PATCH 06/13] refactor(command): simplify commit_cell_value by extracting helper functions Simplify the commit_cell_value function by extracting its core logic into specialized helper functions: commit_regular_cell_value, stage_drill_edit, and commit_plain_records_edit. This improves readability and reduces nesting. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- src/command/cmd/commit.rs | 132 ++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/src/command/cmd/commit.rs b/src/command/cmd/commit.rs index b969be8..7fe2ca2 100644 --- a/src/command/cmd/commit.rs +++ b/src/command/cmd/commit.rs @@ -139,6 +139,71 @@ mod tests { /// Commit a cell value: for synthetic records keys, stage in drill pending edits /// in drill mode, or apply directly in plain records mode; for real keys, write /// to the model. +fn commit_regular_cell_value(key: &CellKey, value: &str, effects: &mut Vec>) { + if value.is_empty() { + effects.push(Box::new(effect::ClearCell(key.clone()))); + } else if let Ok(n) = value.parse::() { + effects.push(Box::new(effect::SetCell(key.clone(), CellValue::Number(n)))); + } else { + effects.push(Box::new(effect::SetCell( + key.clone(), + CellValue::Text(value.to_string()), + ))); + } + effects.push(effect::mark_dirty()); +} + +/// Stage a synthetic edit in drill state so it can be applied atomically on exit. +fn stage_drill_edit(record_idx: usize, col_name: String, value: &str) -> Box { + Box::new(effect::SetDrillPendingEdit { + record_idx, + col_name, + new_value: value.to_string(), + }) +} + +/// Apply a synthetic records-mode edit directly to the underlying model cell. +fn commit_plain_records_edit( + ctx: &CmdContext, + record_idx: usize, + col_name: &str, + value: &str, + effects: &mut Vec>, +) { + let Some((orig_key, _)) = ctx + .layout + .records + .as_ref() + .and_then(|records| records.get(record_idx)) + else { + return; + }; + + if col_name == "Value" { + commit_regular_cell_value(orig_key, value, effects); + return; + } + + if value.is_empty() { + effects.push(effect::set_status(effect::RECORD_COORDS_CANNOT_BE_EMPTY)); + return; + } + + let Some(existing_value) = ctx.model.get_cell(orig_key).cloned() else { + return; + }; + effects.push(Box::new(effect::ClearCell(orig_key.clone()))); + effects.push(Box::new(effect::AddItem { + category: col_name.to_string(), + item: value.to_string(), + })); + effects.push(Box::new(effect::SetCell( + orig_key.clone().with(col_name, value), + existing_value, + ))); + effects.push(effect::mark_dirty()); +} + fn commit_cell_value( ctx: &CmdContext, key: &CellKey, @@ -147,73 +212,14 @@ fn commit_cell_value( ) { if let Some((record_idx, col_name)) = crate::view::synthetic_record_info(key) { if ctx.has_drill_state { - effects.push(Box::new(effect::SetDrillPendingEdit { - record_idx, - col_name, - new_value: value.to_string(), - })); + effects.push(stage_drill_edit(record_idx, col_name, value)); return; } - - let Some((orig_key, _)) = ctx - .layout - .records - .as_ref() - .and_then(|records| records.get(record_idx)) - else { - return; - }; - - if col_name == "Value" { - if value.is_empty() { - effects.push(Box::new(effect::ClearCell(orig_key.clone()))); - } else if let Ok(n) = value.parse::() { - effects.push(Box::new(effect::SetCell( - orig_key.clone(), - CellValue::Number(n), - ))); - } else { - effects.push(Box::new(effect::SetCell( - orig_key.clone(), - CellValue::Text(value.to_string()), - ))); - } - effects.push(effect::mark_dirty()); - return; - } - - if value.is_empty() { - effects.push(effect::set_status("Record coordinates cannot be empty")); - return; - } - - let Some(existing_value) = ctx.model.get_cell(orig_key).cloned() else { - return; - }; - effects.push(Box::new(effect::ClearCell(orig_key.clone()))); - effects.push(Box::new(effect::AddItem { - category: col_name.clone(), - item: value.to_string(), - })); - effects.push(Box::new(effect::SetCell( - orig_key.clone().with(col_name, value), - existing_value, - ))); - effects.push(effect::mark_dirty()); + commit_plain_records_edit(ctx, record_idx, &col_name, value, effects); return; - } else if value.is_empty() { - effects.push(Box::new(effect::ClearCell(key.clone()))); - effects.push(effect::mark_dirty()); - } else if let Ok(n) = value.parse::() { - effects.push(Box::new(effect::SetCell(key.clone(), CellValue::Number(n)))); - effects.push(effect::mark_dirty()); - } else { - effects.push(Box::new(effect::SetCell( - key.clone(), - CellValue::Text(value.to_string()), - ))); - effects.push(effect::mark_dirty()); } + + commit_regular_cell_value(key, value, effects); } /// Direction to advance after committing a cell edit. From efab0cc32e2a942ee9912a25f8e5dcae6202e8bd Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 03:01:30 -0700 Subject: [PATCH 07/13] test(ui): clean up formatting in app.rs tests Clean up formatting of a test assertion in app.rs to improve readability. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- src/ui/app.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/ui/app.rs b/src/ui/app.rs index cd9f411..5228bee 100644 --- a/src/ui/app.rs +++ b/src/ui/app.rs @@ -853,11 +853,7 @@ mod tests { .unwrap(); assert!( - !app.model - .category("Region") - .unwrap() - .items - .contains_key(""), + !app.model.category("Region").unwrap().items.contains_key(""), "records-mode edits should not create empty category items" ); } From 38f83b24174cc4302a11ad0249e58ff1a08e9a8f Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 03:56:18 -0700 Subject: [PATCH 08/13] fix(records): include _Measure as visible column in records mode (improvise-rbv) The records mode column filter excluded all categories starting with '_', which hid _Measure. Changed to explicitly exclude only _Index and _Dim, making _Measure visible as a data column. Updated the blank-model editing test to reflect the new column order (_Measure first, Value last). Made-with: Cursor --- src/ui/app.rs | 20 ++++++++++++++++++-- src/view/layout.rs | 31 ++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/ui/app.rs b/src/ui/app.rs index 5228bee..1d20ca7 100644 --- a/src/ui/app.rs +++ b/src/ui/app.rs @@ -744,7 +744,8 @@ mod tests { /// Regression: editing the first row in a blank model's records view /// should persist the typed value even though plain records mode does not - /// use drill state. + /// use drill state. With _Measure as the first column, `o` lands on it; + /// type a measure name, Tab to Value, type the number, Enter to commit. #[test] fn edit_record_row_in_blank_model_persists_value() { use crate::model::cell::CellKey; @@ -753,15 +754,30 @@ mod tests { app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) .unwrap(); + // `o` adds a record row and enters edit at (0, 0) = _Measure column app.handle_key(KeyEvent::new(KeyCode::Char('o'), KeyModifiers::NONE)) .unwrap(); + // Type a measure name + app.handle_key(KeyEvent::new(KeyCode::Char('R'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('e'), KeyModifiers::NONE)) + .unwrap(); + app.handle_key(KeyEvent::new(KeyCode::Char('v'), KeyModifiers::NONE)) + .unwrap(); + // Tab to commit _Measure and move to Value column + app.handle_key(KeyEvent::new(KeyCode::Tab, KeyModifiers::NONE)) + .unwrap(); + // Type the value app.handle_key(KeyEvent::new(KeyCode::Char('5'), KeyModifiers::NONE)) .unwrap(); + // Enter to commit app.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)) .unwrap(); assert_eq!( - app.model.get_cell(&CellKey::new(vec![])), + app.model.get_cell(&CellKey::new(vec![ + ("_Measure".to_string(), "Rev".to_string()), + ])), Some(&crate::model::cell::CellValue::Number(5.0)), "editing a synthetic row in plain records mode should write the value" ); diff --git a/src/view/layout.rs b/src/view/layout.rs index f502843..69403cc 100644 --- a/src/view/layout.rs +++ b/src/view/layout.rs @@ -160,7 +160,7 @@ impl GridLayout { let cat_names: Vec = model .category_names() .into_iter() - .filter(|c| !c.starts_with('_')) + .filter(|c| *c != "_Index" && *c != "_Dim") .map(String::from) .collect(); let mut col_items: Vec = cat_names @@ -698,6 +698,35 @@ mod tests { assert_eq!(dim, "Region"); } + /// Regression test for improvise-rbv: records mode should include _Measure + /// as a _Dim column so the measure name is visible per-record, and "Value" + /// must remain the last column. + #[test] + fn records_mode_includes_measure_in_dim_columns() { + let mut m = records_model(); + let v = m.active_view_mut(); + v.set_axis("_Index", Axis::Row); + v.set_axis("_Dim", Axis::Column); + let layout = GridLayout::new(&m, m.active_view()); + assert!(layout.is_records_mode()); + let cols: Vec = (0..layout.col_count()) + .map(|i| layout.col_label(i)) + .collect(); + assert!( + cols.contains(&"_Measure".to_string()), + "records mode should include _Measure column; got {:?}", + cols + ); + assert!(cols.contains(&"Region".to_string())); + assert!(cols.contains(&"Value".to_string())); + assert_eq!( + cols.last().unwrap(), + "Value", + "Value must be the last column so cursor defaults land on it; got {:?}", + cols + ); + } + fn coord(pairs: &[(&str, &str)]) -> CellKey { CellKey::new( pairs From 3f69f887099e6e2c186971051d1e8e405273ddfc Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 04:04:57 -0700 Subject: [PATCH 09/13] refactor!(formula): migrate parser to use pest Replace the manual tokenizer and recursive descent parser with a PEG grammar using the pest library. This migration involves introducing a formal grammar in formula.pest and updating the parser implementation to utilize the generated Pest parser with a tree-walking approach to construct the AST. The change introduces a stricter requirement for identifiers: multi-word identifiers must now be enclosed in pipe quotes (e.g., |Total Revenue|) and are no longer accepted as bare words. Tests have been updated to reflect the new parsing logic, remove tokenizer-specific tests, and verify the new pipe-quoting and escape semantics. BREAKING CHANGE: Multi-word identifiers now require pipe-quoting (e.g. |Total Revenue|) and are no longer accepted as bare words. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- Cargo.lock | 2 + crates/improvise-formula/Cargo.toml | 2 + crates/improvise-formula/src/formula.pest | 91 +++ crates/improvise-formula/src/parser.rs | 755 +++++++++------------- 4 files changed, 404 insertions(+), 446 deletions(-) create mode 100644 crates/improvise-formula/src/formula.pest diff --git a/Cargo.lock b/Cargo.lock index a3d10bf..ad2f76e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -756,6 +756,8 @@ name = "improvise-formula" version = "0.1.0-rc2" dependencies = [ "anyhow", + "pest", + "pest_derive", "serde", ] diff --git a/crates/improvise-formula/Cargo.toml b/crates/improvise-formula/Cargo.toml index 338e0e6..7782815 100644 --- a/crates/improvise-formula/Cargo.toml +++ b/crates/improvise-formula/Cargo.toml @@ -8,4 +8,6 @@ repository = "https://github.com/fiddlerwoaroof/improvise" [dependencies] anyhow = "1" +pest = "2.8.6" +pest_derive = "2.8.6" serde = { version = "1", features = ["derive"] } diff --git a/crates/improvise-formula/src/formula.pest b/crates/improvise-formula/src/formula.pest new file mode 100644 index 0000000..e698d3a --- /dev/null +++ b/crates/improvise-formula/src/formula.pest @@ -0,0 +1,91 @@ +// Formula grammar for improvise. +// +// A formula has the form: TARGET = EXPR [WHERE filter] +// See parser.rs for the tree walker that produces a Formula AST. +// +// Identifier rules (bare_ident / pipe_quoted) mirror `bare_name` and +// `pipe_quoted` in src/persistence/improv.pest: bare identifiers are +// alphanumeric plus `_` and `-`, with no internal spaces; multi-word +// names must be pipe-quoted. + +// Auto-skip horizontal whitespace between tokens in non-atomic rules. +WHITESPACE = _{ " " | "\t" } + +// ---- top-level ---------------------------------------------------------- + +formula = { SOI ~ target ~ "=" ~ expr ~ where_clause? ~ EOI } + +// The target keeps its raw text (including pipes, if any) — we capture +// the span directly rather than walking into its children. +target = { identifier } + +where_clause = { ^"WHERE" ~ identifier ~ "=" ~ filter_value } + +// ---- expressions -------------------------------------------------------- + +// Used by parse_expr() — forces a standalone expression to consume the +// whole input, so `1 + 2 3` fails instead of silently dropping " 3". +expr_eoi = { SOI ~ expr ~ EOI } + +expr = { add_expr } + +add_expr = { mul_expr ~ (add_op ~ mul_expr)* } +add_op = { "+" | "-" } + +mul_expr = { pow_expr ~ (mul_op ~ pow_expr)* } +mul_op = { "*" | "/" } + +pow_expr = { unary ~ (pow_op ~ unary)? } +pow_op = { "^" } + +unary = { unary_minus | primary } +unary_minus = { "-" ~ primary } + +primary = { + number + | agg_call + | if_expr + | paren_expr + | ref_expr +} + +paren_expr = { "(" ~ expr ~ ")" } + +// Aggregates with optional inline WHERE filter inside the parens. +agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" } +agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" } +inline_where = { ^"WHERE" ~ identifier ~ "=" ~ filter_value } + +// IF(cond, then, else). Comparison is a standalone rule because comparison +// operators are not valid in general expressions — only inside an IF condition. +if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" } +comparison = { expr ~ cmp_op ~ expr } +cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" } + +// A reference to an item. `SUM` and `IF` without parens fall through to +// this rule because agg_call / if_expr require a "(" and otherwise fail. +ref_expr = { identifier } + +// ---- identifiers -------------------------------------------------------- +// +// Mirror of improv.pest's bare_name / pipe_quoted. + +identifier = ${ pipe_quoted | bare_ident } + +// Backslash escapes inside pipes: \| literal pipe, \\ backslash, \n newline. +pipe_quoted = @{ "|" ~ ("\\" ~ ANY | !"|" ~ ANY)* ~ "|" } + +bare_ident = @{ + (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")* +} + +// ---- literal values ----------------------------------------------------- + +filter_value = { string | pipe_quoted | bare_ident } + +string = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" } + +number = @{ + ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? + | "." ~ ASCII_DIGIT+ +} diff --git a/crates/improvise-formula/src/parser.rs b/crates/improvise-formula/src/parser.rs index 01df646..0e7238c 100644 --- a/crates/improvise-formula/src/parser.rs +++ b/crates/improvise-formula/src/parser.rs @@ -1,462 +1,321 @@ use anyhow::{Result, anyhow}; +use pest::Parser as _; +use pest::iterators::Pair; +use pest_derive::Parser; use super::ast::{AggFunc, BinOp, Expr, Filter, Formula}; +#[derive(Parser)] +#[grammar = "formula.pest"] +struct FormulaParser; + /// Parse a formula string like "Profit = Revenue - Cost" /// or "Tax = Revenue * 0.08 WHERE Region = \"East\"" pub fn parse_formula(raw: &str, target_category: &str) -> Result { - let raw = raw.trim(); + let input = raw.trim(); + let formula_pair = FormulaParser::parse(Rule::formula, input) + .map_err(|e| anyhow!("{}", e))? + .next() + .ok_or_else(|| anyhow!("empty parse result"))?; + build_formula(formula_pair, input, target_category) +} - // Split on first `=` to get target = expression - let eq_pos = raw - .find('=') - .ok_or_else(|| anyhow!("Formula must contain '=': {raw}"))?; - let target = raw[..eq_pos].trim().to_string(); - let rest = raw[eq_pos + 1..].trim(); +/// Parse a bare expression (no target, no top-level WHERE clause). +/// Fails if the input contains trailing tokens after a complete expression. +pub fn parse_expr(s: &str) -> Result { + let input = s.trim(); + let expr_pair = FormulaParser::parse(Rule::expr_eoi, input) + .map_err(|e| anyhow!("{}", e))? + .next() + .ok_or_else(|| anyhow!("empty parse result"))? + .into_inner() + .next() + .ok_or_else(|| anyhow!("missing expression in expr_eoi"))?; + build_expr(expr_pair) +} - // Check for WHERE clause at top level - let (expr_str, filter) = split_where(rest); - let filter = filter.map(parse_where).transpose()?; - - let expr = parse_expr(expr_str.trim())?; +// ---- tree walkers ------------------------------------------------------- +fn build_formula(pair: Pair, raw: &str, target_category: &str) -> Result { + let mut target = None; + let mut expr = None; + let mut filter = None; + for inner in pair.into_inner() { + match inner.as_rule() { + Rule::target => target = Some(inner.as_str().trim().to_string()), + Rule::expr => expr = Some(build_expr(inner)?), + Rule::where_clause => filter = Some(build_filter(inner)?), + Rule::EOI => {} + r => return Err(anyhow!("unexpected rule in formula: {:?}", r)), + } + } + let target = target.ok_or_else(|| anyhow!("missing target in formula"))?; + let expr = expr.ok_or_else(|| anyhow!("missing expression in formula"))?; Ok(Formula::new(raw, target, target_category, expr, filter)) } -fn split_where(s: &str) -> (&str, Option<&str>) { - // Find WHERE not inside parens or quotes - let bytes = s.as_bytes(); - let mut depth = 0i32; - let mut i = 0; - while i < bytes.len() { - match bytes[i] { - b'(' => depth += 1, - b')' => depth -= 1, - b'"' => { - i += 1; - while i < bytes.len() && bytes[i] != b'"' { - i += 1; - } - } - b'|' => { - i += 1; - while i < bytes.len() && bytes[i] != b'|' { - i += 1; - } - } - _ if depth == 0 => { - if s[i..].to_ascii_uppercase().starts_with("WHERE") { - let before = &s[..i]; - let after = &s[i + 5..]; - if before.ends_with(char::is_whitespace) || i == 0 { - return (before.trim(), Some(after.trim())); - } - } - } - _ => {} - } - i += 1; - } - (s, None) +fn build_expr(pair: Pair) -> Result { + // expr = { add_expr } + build_add_expr(first_inner(pair, "expr")?) } -/// Strip pipe or double-quote delimiters from a value. -fn unquote(s: &str) -> String { - let s = s.trim(); - if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('|') && s.ends_with('|')) { - s[1..s.len() - 1].to_string() +fn build_add_expr(pair: Pair) -> Result { + fold_left_binop(pair, build_mul_expr, |s| match s { + "+" => Some(BinOp::Add), + "-" => Some(BinOp::Sub), + _ => None, + }) +} + +fn build_mul_expr(pair: Pair) -> Result { + fold_left_binop(pair, build_pow_expr, |s| match s { + "*" => Some(BinOp::Mul), + "/" => Some(BinOp::Div), + _ => None, + }) +} + +fn build_pow_expr(pair: Pair) -> Result { + // pow_expr = { unary ~ (pow_op ~ unary)? } + let mut pairs = pair.into_inner(); + let base_pair = pairs + .next() + .ok_or_else(|| anyhow!("empty pow_expr"))?; + let base = build_unary(base_pair)?; + match pairs.next() { + None => Ok(base), + Some(op_pair) => { + debug_assert_eq!(op_pair.as_rule(), Rule::pow_op); + let exp_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing exponent in pow_expr"))?; + let exp = build_unary(exp_pair)?; + Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp))) + } + } +} + +fn build_unary(pair: Pair) -> Result { + // unary = { unary_minus | primary } + let inner = first_inner(pair, "unary")?; + match inner.as_rule() { + Rule::unary_minus => { + let prim = first_inner(inner, "unary_minus")?; + Ok(Expr::UnaryMinus(Box::new(build_primary(prim)?))) + } + Rule::primary => build_primary(inner), + r => Err(anyhow!("unexpected rule in unary: {:?}", r)), + } +} + +fn build_primary(pair: Pair) -> Result { + // primary = { number | agg_call | if_expr | paren_expr | ref_expr } + let inner = first_inner(pair, "primary")?; + match inner.as_rule() { + Rule::number => Ok(Expr::Number(inner.as_str().parse()?)), + Rule::agg_call => build_agg_call(inner), + Rule::if_expr => build_if_expr(inner), + Rule::paren_expr => build_expr(first_inner(inner, "paren_expr")?), + Rule::ref_expr => { + let id_pair = first_inner(inner, "ref_expr")?; + Ok(Expr::Ref(identifier_to_string(id_pair))) + } + r => Err(anyhow!("unexpected rule in primary: {:?}", r)), + } +} + +fn build_agg_call(pair: Pair) -> Result { + // agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" } + let mut pairs = pair.into_inner(); + let func_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing agg_func"))?; + let func = parse_agg_func(func_pair.as_str())?; + let expr_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing aggregate argument"))?; + let inner_expr = build_expr(expr_pair)?; + let filter = match pairs.next() { + Some(p) if p.as_rule() == Rule::inline_where => Some(build_filter(p)?), + _ => None, + }; + Ok(Expr::Agg(func, Box::new(inner_expr), filter)) +} + +fn parse_agg_func(s: &str) -> Result { + match s.to_ascii_uppercase().as_str() { + "SUM" => Ok(AggFunc::Sum), + "AVG" => Ok(AggFunc::Avg), + "MIN" => Ok(AggFunc::Min), + "MAX" => Ok(AggFunc::Max), + "COUNT" => Ok(AggFunc::Count), + f => Err(anyhow!("unknown aggregate function: {}", f)), + } +} + +fn build_if_expr(pair: Pair) -> Result { + // if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" } + let mut pairs = pair.into_inner(); + let cond_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing IF condition"))?; + let cond = build_comparison(cond_pair)?; + let then_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing IF then-branch"))?; + let then_e = build_expr(then_pair)?; + let else_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing IF else-branch"))?; + let else_e = build_expr(else_pair)?; + Ok(Expr::If( + Box::new(cond), + Box::new(then_e), + Box::new(else_e), + )) +} + +fn build_comparison(pair: Pair) -> Result { + // comparison = { expr ~ cmp_op ~ expr } + let mut pairs = pair.into_inner(); + let lhs_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing comparison lhs"))?; + let lhs = build_expr(lhs_pair)?; + let op_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing comparison operator"))?; + let op = parse_cmp_op(op_pair.as_str())?; + let rhs_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing comparison rhs"))?; + let rhs = build_expr(rhs_pair)?; + Ok(Expr::BinOp(op, Box::new(lhs), Box::new(rhs))) +} + +fn parse_cmp_op(s: &str) -> Result { + match s { + "=" => Ok(BinOp::Eq), + "!=" => Ok(BinOp::Ne), + "<" => Ok(BinOp::Lt), + ">" => Ok(BinOp::Gt), + "<=" => Ok(BinOp::Le), + ">=" => Ok(BinOp::Ge), + o => Err(anyhow!("unknown comparison operator: {}", o)), + } +} + +fn build_filter(pair: Pair) -> Result { + // where_clause / inline_where both have shape: + // ^"WHERE" ~ identifier ~ "=" ~ filter_value + let mut pairs = pair.into_inner(); + let cat_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing WHERE category"))?; + let category = identifier_to_string(cat_pair); + let val_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing WHERE value"))?; + let item = filter_value_to_string(val_pair); + Ok(Filter { category, item }) +} + +fn filter_value_to_string(pair: Pair) -> String { + // filter_value = { string | pipe_quoted | bare_ident } + let inner = pair + .into_inner() + .next() + .expect("filter_value must have an inner pair"); + let s = inner.as_str(); + match inner.as_rule() { + Rule::string => strip_string_quotes(s), + Rule::pipe_quoted => unquote_pipe(s), + _ => s.to_string(), + } +} + +/// Convert an identifier pair (identifier, pipe_quoted, or bare_ident) to +/// its content string. Pipe-quoted identifiers have their delimiters +/// stripped and backslash escapes applied; bare identifiers are returned +/// verbatim. +fn identifier_to_string(pair: Pair) -> String { + let s = pair.as_str(); + if is_pipe_quoted(s) { + unquote_pipe(s) } else { s.to_string() } } -fn parse_where(s: &str) -> Result { - // Format: Category = "Item" or Category = |Item| or Category = Item - let eq_pos = s - .find('=') - .ok_or_else(|| anyhow!("WHERE clause must contain '=': {s}"))?; - let category = unquote(&s[..eq_pos]); - let item = unquote(&s[eq_pos + 1..]); - Ok(Filter { category, item }) +fn is_pipe_quoted(s: &str) -> bool { + s.len() >= 2 && s.starts_with('|') && s.ends_with('|') } -/// Parse an expression using recursive descent -pub fn parse_expr(s: &str) -> Result { - let tokens = tokenize(s)?; - let mut pos = 0; - let expr = parse_add_sub(&tokens, &mut pos)?; - if pos < tokens.len() { - return Err(anyhow!( - "Unexpected token at position {pos}: {:?}", - tokens[pos] - )); - } - Ok(expr) +fn strip_string_quotes(s: &str) -> String { + debug_assert!(s.len() >= 2 && s.starts_with('"') && s.ends_with('"')); + s[1..s.len() - 1].to_string() } -#[derive(Debug, Clone, PartialEq)] -enum Token { - Number(f64), - Ident(String), - Str(String), - Plus, - Minus, - Star, - Slash, - Caret, - LParen, - RParen, - Comma, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, -} - -fn tokenize(s: &str) -> Result> { - let mut tokens = Vec::new(); - let chars: Vec = s.chars().collect(); - let mut i = 0; - - while i < chars.len() { - match chars[i] { - ' ' | '\t' | '\n' => i += 1, - '+' => { - tokens.push(Token::Plus); - i += 1; - } - '-' => { - tokens.push(Token::Minus); - i += 1; - } - '*' => { - tokens.push(Token::Star); - i += 1; - } - '/' => { - tokens.push(Token::Slash); - i += 1; - } - '^' => { - tokens.push(Token::Caret); - i += 1; - } - '(' => { - tokens.push(Token::LParen); - i += 1; - } - ')' => { - tokens.push(Token::RParen); - i += 1; - } - ',' => { - tokens.push(Token::Comma); - i += 1; - } - '!' if chars.get(i + 1) == Some(&'=') => { - tokens.push(Token::Ne); - i += 2; - } - '<' if chars.get(i + 1) == Some(&'=') => { - tokens.push(Token::Le); - i += 2; - } - '>' if chars.get(i + 1) == Some(&'=') => { - tokens.push(Token::Ge); - i += 2; - } - '<' => { - tokens.push(Token::Lt); - i += 1; - } - '>' => { - tokens.push(Token::Gt); - i += 1; - } - '=' => { - tokens.push(Token::Eq); - i += 1; - } - '"' => { - i += 1; - let mut s = String::new(); - while i < chars.len() && chars[i] != '"' { - s.push(chars[i]); - i += 1; +/// Strip surrounding pipes and apply backslash escapes: `\|` → `|`, +/// `\\` → `\`, `\n` → newline. Matches the escape semantics documented +/// in src/persistence/improv.pest. +fn unquote_pipe(s: &str) -> String { + debug_assert!(is_pipe_quoted(s)); + let inner = &s[1..s.len() - 1]; + let mut out = String::with_capacity(inner.len()); + let mut chars = inner.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('|') => out.push('|'), + Some('\\') => out.push('\\'), + Some('n') => out.push('\n'), + Some(other) => { + out.push('\\'); + out.push(other); } - if i < chars.len() { - i += 1; - } - tokens.push(Token::Str(s)); + None => out.push('\\'), } - '|' => { - i += 1; - let mut s = String::new(); - while i < chars.len() && chars[i] != '|' { - s.push(chars[i]); - i += 1; - } - if i < chars.len() { - i += 1; - } - tokens.push(Token::Ident(s)); - } - c if c.is_ascii_digit() || c == '.' => { - let mut num = String::new(); - while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') { - num.push(chars[i]); - i += 1; - } - tokens.push(Token::Number(num.parse()?)); - } - c if c.is_alphabetic() || c == '_' => { - let mut ident = String::new(); - while i < chars.len() - && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == ' ') - { - // Don't consume trailing spaces if next non-space is operator - if chars[i] == ' ' { - // Peek ahead past spaces to find the next word/token - let j = i + 1; - let next_nonspace = chars[j..].iter().find(|&&c| c != ' '); - if matches!( - next_nonspace, - Some('+') - | Some('-') - | Some('*') - | Some('/') - | Some('^') - | Some(')') - | Some(',') - | Some('<') - | Some('>') - | Some('=') - | Some('!') - | Some('"') - | None - ) { - break; - } - // Break if the identifier collected so far is a keyword - let trimmed = ident.trim_end().to_ascii_uppercase(); - if matches!( - trimmed.as_str(), - "WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF" - ) { - break; - } - // Also break if the next word is a keyword - let rest: String = chars[j..].iter().collect(); - let next_word: String = rest - .trim_start() - .chars() - .take_while(|c| c.is_alphanumeric() || *c == '_') - .collect(); - let upper = next_word.to_ascii_uppercase(); - if matches!( - upper.as_str(), - "WHERE" | "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" | "IF" - ) { - break; - } - } - ident.push(chars[i]); - i += 1; - } - let ident = ident.trim_end().to_string(); - tokens.push(Token::Ident(ident)); - } - c => return Err(anyhow!("Unexpected character '{c}' in expression")), + } else { + out.push(c); } } - Ok(tokens) + out } -fn parse_add_sub(tokens: &[Token], pos: &mut usize) -> Result { - let mut left = parse_mul_div(tokens, pos)?; - while *pos < tokens.len() { - let op = match &tokens[*pos] { - Token::Plus => BinOp::Add, - Token::Minus => BinOp::Sub, - _ => break, - }; - *pos += 1; - let right = parse_mul_div(tokens, pos)?; +// ---- small helpers ------------------------------------------------------ + +fn first_inner<'a>(pair: Pair<'a, Rule>, ctx: &str) -> Result> { + pair.into_inner() + .next() + .ok_or_else(|| anyhow!("empty rule: {}", ctx)) +} + +/// Fold a left-associative binary-operator rule of the shape +/// `rule = { child ~ (op ~ child)* }` into a left-leaning BinOp tree. +fn fold_left_binop(pair: Pair, mut build_child: F, match_op: M) -> Result +where + F: FnMut(Pair) -> Result, + M: Fn(&str) -> Option, +{ + let mut pairs = pair.into_inner(); + let first = pairs + .next() + .ok_or_else(|| anyhow!("empty binop rule"))?; + let mut left = build_child(first)?; + while let Some(op_pair) = pairs.next() { + let op = match_op(op_pair.as_str()).ok_or_else(|| { + anyhow!("unexpected operator token: {:?}", op_pair.as_str()) + })?; + let right_pair = pairs + .next() + .ok_or_else(|| anyhow!("missing rhs for operator"))?; + let right = build_child(right_pair)?; left = Expr::BinOp(op, Box::new(left), Box::new(right)); } Ok(left) } -fn parse_mul_div(tokens: &[Token], pos: &mut usize) -> Result { - let mut left = parse_pow(tokens, pos)?; - while *pos < tokens.len() { - let op = match &tokens[*pos] { - Token::Star => BinOp::Mul, - Token::Slash => BinOp::Div, - _ => break, - }; - *pos += 1; - let right = parse_pow(tokens, pos)?; - left = Expr::BinOp(op, Box::new(left), Box::new(right)); - } - Ok(left) -} - -fn parse_pow(tokens: &[Token], pos: &mut usize) -> Result { - let base = parse_unary(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::Caret { - *pos += 1; - let exp = parse_unary(tokens, pos)?; - return Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp))); - } - Ok(base) -} - -fn parse_unary(tokens: &[Token], pos: &mut usize) -> Result { - if *pos < tokens.len() && tokens[*pos] == Token::Minus { - *pos += 1; - let e = parse_primary(tokens, pos)?; - return Ok(Expr::UnaryMinus(Box::new(e))); - } - parse_primary(tokens, pos) -} - -fn parse_primary(tokens: &[Token], pos: &mut usize) -> Result { - if *pos >= tokens.len() { - return Err(anyhow!("Unexpected end of expression")); - } - match &tokens[*pos].clone() { - Token::Number(n) => { - *pos += 1; - Ok(Expr::Number(*n)) - } - Token::Ident(name) => { - let name = name.clone(); - *pos += 1; - // Check for function call - let upper = name.to_ascii_uppercase(); - match upper.as_str() { - "SUM" | "AVG" | "MIN" | "MAX" | "COUNT" => { - let func = match upper.as_str() { - "SUM" => AggFunc::Sum, - "AVG" => AggFunc::Avg, - "MIN" => AggFunc::Min, - "MAX" => AggFunc::Max, - "COUNT" => AggFunc::Count, - _ => unreachable!(), - }; - if *pos < tokens.len() && tokens[*pos] == Token::LParen { - *pos += 1; - let inner = parse_add_sub(tokens, pos)?; - // Optional WHERE filter - let filter = if *pos < tokens.len() { - if let Token::Ident(kw) = &tokens[*pos] { - if kw.eq_ignore_ascii_case("WHERE") { - *pos += 1; - let cat = match &tokens[*pos] { - Token::Ident(s) => { - let s = s.clone(); - *pos += 1; - s - } - t => { - return Err(anyhow!( - "Expected category name, got {t:?}" - )); - } - }; - // expect = - if *pos < tokens.len() && tokens[*pos] == Token::Eq { - *pos += 1; - } - let item = match &tokens[*pos] { - Token::Str(s) | Token::Ident(s) => { - let s = s.clone(); - *pos += 1; - s - } - t => return Err(anyhow!("Expected item name, got {t:?}")), - }; - Some(Filter { - category: cat, - item, - }) - } else { - None - } - } else { - None - } - } else { - None - }; - // expect ) - if *pos < tokens.len() && tokens[*pos] == Token::RParen { - *pos += 1; - } else { - return Err(anyhow!("Expected ')' to close aggregate function")); - } - return Ok(Expr::Agg(func, Box::new(inner), filter)); - } - Ok(Expr::Ref(name)) - } - "IF" => { - if *pos < tokens.len() && tokens[*pos] == Token::LParen { - *pos += 1; - let cond = parse_comparison(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::Comma { - *pos += 1; - } - let then = parse_add_sub(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::Comma { - *pos += 1; - } - let else_ = parse_add_sub(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::RParen { - *pos += 1; - } else { - return Err(anyhow!("Expected ')' to close IF(...)")); - } - return Ok(Expr::If(Box::new(cond), Box::new(then), Box::new(else_))); - } - Ok(Expr::Ref(name)) - } - _ => Ok(Expr::Ref(name)), - } - } - Token::LParen => { - *pos += 1; - let e = parse_add_sub(tokens, pos)?; - if *pos < tokens.len() && tokens[*pos] == Token::RParen { - *pos += 1; - } - Ok(e) - } - t => Err(anyhow!("Unexpected token in expression: {t:?}")), - } -} - -fn parse_comparison(tokens: &[Token], pos: &mut usize) -> Result { - let left = parse_add_sub(tokens, pos)?; - if *pos >= tokens.len() { - return Ok(left); - } - let op = match &tokens[*pos] { - Token::Eq => BinOp::Eq, - Token::Ne => BinOp::Ne, - Token::Lt => BinOp::Lt, - Token::Gt => BinOp::Gt, - Token::Le => BinOp::Le, - Token::Ge => BinOp::Ge, - _ => return Ok(left), - }; - *pos += 1; - let right = parse_add_sub(tokens, pos)?; - Ok(Expr::BinOp(op, Box::new(left), Box::new(right))) -} - #[cfg(test)] mod tests { use super::parse_formula; @@ -544,8 +403,8 @@ mod tests { assert_eq!(filter.item, "East"); } - /// Regression: WHERE inside aggregate parens must tokenize correctly. - /// The tokenizer must not merge "Revenue WHERE" into a single identifier. + /// Regression: WHERE inside aggregate parens must parse as the + /// aggregate's inline filter, not as a top-level WHERE clause. #[test] fn parse_sum_with_inline_where_filter() { let f = parse_formula("EastTotal = SUM(Revenue WHERE Region = \"East\")", "Foo").unwrap(); @@ -562,7 +421,6 @@ mod tests { #[test] fn parse_if_with_comparison_operators() { - // Test each comparison operator in an IF expression let f = parse_formula("X = IF(A != 0, A, 1)", "Cat").unwrap(); assert!(matches!(f.expr, Expr::If(_, _, _))); @@ -583,7 +441,6 @@ mod tests { #[test] fn parse_where_with_quoted_string_inside_expression() { - // WHERE inside a formula string with quotes let f = parse_formula("X = Revenue WHERE Region = \"West Coast\"", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "West Coast"); @@ -650,11 +507,10 @@ mod tests { } } - // ── Quoted string in tokenizer ────────────────────────────────────── + // ── Quoted string in WHERE ────────────────────────────────────────── #[test] fn parse_quoted_string_in_where() { - // Quoted strings work in top-level WHERE clauses let f = parse_formula("X = Revenue WHERE Region = \"East\"", "Cat").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "East"); @@ -681,27 +537,21 @@ mod tests { assert!(parse_expr("").is_err()); } + // ── Multi-word identifiers must be pipe-quoted ────────────────────── + #[test] - fn tokenizer_breaks_at_where_keyword() { - use super::tokenize; - let tokens = tokenize("Revenue WHERE Region").unwrap(); - // Should produce 3 tokens: Ident("Revenue"), Ident("WHERE"), Ident("Region") - assert_eq!(tokens.len(), 3, "Expected 3 tokens, got: {tokens:?}"); + fn multi_word_bare_identifier_is_rejected() { + // Multi-word identifiers must be pipe-quoted; bare multi-word fails + // the `bare_name`-compatible grammar rule. + assert!(parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").is_err()); } - // ── Multi-word identifiers ────────────────────────────────────────── + // ── WHERE inside quotes in the expression ─────────────────────────── #[test] - fn parse_multi_word_identifier() { - let f = parse_formula("Total Revenue = Base Revenue + Bonus", "Foo").unwrap(); - assert_eq!(f.target, "Total Revenue"); - } - - // ── WHERE inside quotes in split_where ────────────────────────────── - - #[test] - fn split_where_ignores_where_inside_quotes() { - // WHERE inside quotes should not be treated as a keyword + fn where_inside_quotes_is_not_a_keyword() { + // A filter value containing the literal text "WHERE" is parsed as + // a string, not as a nested WHERE keyword. let f = parse_formula("X = Revenue WHERE Region = \"WHERE\"", "Foo").unwrap(); let filter = f.filter.as_ref().unwrap(); assert_eq!(filter.item, "WHERE"); @@ -773,4 +623,17 @@ mod tests { panic!("Expected SUM with WHERE filter, got: {:?}", f.expr); } } + + // ── Pipe-quoted escape semantics ──────────────────────────────────── + + #[test] + fn pipe_quoted_escape_literal_pipe() { + // \| inside a pipe-quoted identifier is a literal pipe + let f = parse_formula("X = |A\\|B|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A|B"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } } From dba8a5269eb3cc7bbec1ce640737e9367b4224c9 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 04:16:09 -0700 Subject: [PATCH 10/13] refactor(records): use CategoryKind for column filtering, stabilize _Measure position - Filter records-mode columns by CategoryKind instead of string names - Simplify cell fetching: matching_cells already handles empty filters - Sort _Measure to always appear right before Value column Made-with: Cursor --- src/view/layout.rs | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/view/layout.rs b/src/view/layout.rs index 69403cc..37c457b 100644 --- a/src/view/layout.rs +++ b/src/view/layout.rs @@ -1,6 +1,7 @@ use std::rc::Rc; use crate::model::Model; +use crate::model::category::CategoryKind; use crate::model::cell::{CellKey, CellValue}; use crate::view::{Axis, View}; @@ -132,22 +133,12 @@ impl GridLayout { page_coords: Vec<(String, String)>, none_cats: Vec, ) -> Self { - // Filter cells by page_coords - let partial: Vec<(String, String)> = page_coords.clone(); - let mut records: Vec<(CellKey, CellValue)> = if partial.is_empty() { - model - .data - .iter_cells() - .map(|(k, v)| (k, v.clone())) - .collect() - } else { - model - .data - .matching_cells(&partial) - .into_iter() - .map(|(k, v)| (k, v.clone())) - .collect() - }; + let mut records: Vec<(CellKey, CellValue)> = model + .data + .matching_cells(&page_coords) + .into_iter() + .map(|(k, v)| (k, v.clone())) + .collect(); // Sort for deterministic ordering records.sort_by(|a, b| a.0.0.cmp(&b.0.0)); @@ -157,12 +148,22 @@ impl GridLayout { .collect(); // Synthesize col items: one per non-virtual category + "Value" - let cat_names: Vec = model + let mut cat_names: Vec = model .category_names() .into_iter() - .filter(|c| *c != "_Index" && *c != "_Dim") + .filter(|c| { + let kind = model.category(c).map(|cat| cat.kind); + !matches!(kind, Some(CategoryKind::VirtualIndex | CategoryKind::VirtualDim)) + }) .map(String::from) .collect(); + // _Measure goes last among category columns (right before Value) + cat_names.sort_by_key(|c| { + matches!( + model.category(c).map(|cat| cat.kind), + Some(CategoryKind::VirtualMeasure) + ) + }); let mut col_items: Vec = cat_names .iter() .map(|c| AxisEntry::DataItem(vec![c.clone()])) From 5fbc73269fc4df4c2c5c9cf3230199b3c3d51944 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 04:32:14 -0700 Subject: [PATCH 11/13] refactor(formula): trust grammar invariants in parser Refactor the formula parser to assume grammar invariants, replacing Result-based error handling in tree walkers with infallible functions and .expect(GRAMMAR_INVARIANT) calls. This simplification is based on the guarantee that the Pest grammar already validates the input structure. To verify these invariants and ensure correctness, extensive new unit tests and property-based tests using proptest have been added. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- Cargo.lock | 2 + crates/improvise-formula/Cargo.toml | 4 + crates/improvise-formula/src/parser.rs | 771 +++++++++++++++++++------ 3 files changed, 607 insertions(+), 170 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad2f76e..b2565b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -758,6 +758,8 @@ dependencies = [ "anyhow", "pest", "pest_derive", + "pest_meta", + "proptest", "serde", ] diff --git a/crates/improvise-formula/Cargo.toml b/crates/improvise-formula/Cargo.toml index 7782815..f7c7a9c 100644 --- a/crates/improvise-formula/Cargo.toml +++ b/crates/improvise-formula/Cargo.toml @@ -11,3 +11,7 @@ anyhow = "1" pest = "2.8.6" pest_derive = "2.8.6" serde = { version = "1", features = ["derive"] } + +[dev-dependencies] +pest_meta = "2.8.6" +proptest = "1" diff --git a/crates/improvise-formula/src/parser.rs b/crates/improvise-formula/src/parser.rs index 0e7238c..51cb101 100644 --- a/crates/improvise-formula/src/parser.rs +++ b/crates/improvise-formula/src/parser.rs @@ -9,6 +9,11 @@ use super::ast::{AggFunc, BinOp, Expr, Filter, Formula}; #[grammar = "formula.pest"] struct FormulaParser; +/// Message used by `.expect()` calls on invariants that the grammar +/// guarantees. If one of these ever panics, the grammar and the tree +/// walker are out of sync — it's a bug, not a runtime condition. +const GRAMMAR_INVARIANT: &str = "grammar invariant violated: parser out of sync with formula.pest"; + /// Parse a formula string like "Profit = Revenue - Cost" /// or "Tax = Revenue * 0.08 WHERE Region = \"East\"" pub fn parse_formula(raw: &str, target_category: &str) -> Result { @@ -16,221 +21,198 @@ pub fn parse_formula(raw: &str, target_category: &str) -> Result { let formula_pair = FormulaParser::parse(Rule::formula, input) .map_err(|e| anyhow!("{}", e))? .next() - .ok_or_else(|| anyhow!("empty parse result"))?; - build_formula(formula_pair, input, target_category) + .expect(GRAMMAR_INVARIANT); + Ok(build_formula(formula_pair, input, target_category)) } /// Parse a bare expression (no target, no top-level WHERE clause). /// Fails if the input contains trailing tokens after a complete expression. pub fn parse_expr(s: &str) -> Result { let input = s.trim(); - let expr_pair = FormulaParser::parse(Rule::expr_eoi, input) + let expr_eoi_pair = FormulaParser::parse(Rule::expr_eoi, input) .map_err(|e| anyhow!("{}", e))? .next() - .ok_or_else(|| anyhow!("empty parse result"))? - .into_inner() - .next() - .ok_or_else(|| anyhow!("missing expression in expr_eoi"))?; - build_expr(expr_pair) + .expect(GRAMMAR_INVARIANT); + Ok(build_expr(first_inner(expr_eoi_pair))) } // ---- tree walkers ------------------------------------------------------- +// +// Every `build_*` function below operates on a pest Pair that has already +// been validated by the grammar. Invariants like "a `formula` has exactly +// one `target`" or "a `comparison` has an lhs, op, and rhs" are guaranteed +// by pest before the tree walker sees the Pair, so these functions are +// infallible. Any `.expect(GRAMMAR_INVARIANT)` in here represents a bug +// marker — if it ever fires, the grammar and the walker have diverged. -fn build_formula(pair: Pair, raw: &str, target_category: &str) -> Result { +fn build_formula(pair: Pair, raw: &str, target_category: &str) -> Formula { let mut target = None; let mut expr = None; let mut filter = None; for inner in pair.into_inner() { - match inner.as_rule() { - Rule::target => target = Some(inner.as_str().trim().to_string()), - Rule::expr => expr = Some(build_expr(inner)?), - Rule::where_clause => filter = Some(build_filter(inner)?), - Rule::EOI => {} - r => return Err(anyhow!("unexpected rule in formula: {:?}", r)), + let rule = inner.as_rule(); + if rule == Rule::target { + target = Some(inner.as_str().trim().to_string()); + } else if rule == Rule::expr { + expr = Some(build_expr(inner)); + } else if rule == Rule::where_clause { + filter = Some(build_filter(inner)); } + // Rule::EOI and any silent rules are ignored. } - let target = target.ok_or_else(|| anyhow!("missing target in formula"))?; - let expr = expr.ok_or_else(|| anyhow!("missing expression in formula"))?; - Ok(Formula::new(raw, target, target_category, expr, filter)) + Formula::new( + raw, + target.expect(GRAMMAR_INVARIANT), + target_category, + expr.expect(GRAMMAR_INVARIANT), + filter, + ) } -fn build_expr(pair: Pair) -> Result { +fn build_expr(pair: Pair) -> Expr { // expr = { add_expr } - build_add_expr(first_inner(pair, "expr")?) + build_add_expr(first_inner(pair)) } -fn build_add_expr(pair: Pair) -> Result { - fold_left_binop(pair, build_mul_expr, |s| match s { - "+" => Some(BinOp::Add), - "-" => Some(BinOp::Sub), - _ => None, +fn build_add_expr(pair: Pair) -> Expr { + fold_left_binop(pair, build_mul_expr, |s| { + if s == "+" { + BinOp::Add + } else { + // The grammar restricts add_op to "+" | "-". + BinOp::Sub + } }) } -fn build_mul_expr(pair: Pair) -> Result { - fold_left_binop(pair, build_pow_expr, |s| match s { - "*" => Some(BinOp::Mul), - "/" => Some(BinOp::Div), - _ => None, +fn build_mul_expr(pair: Pair) -> Expr { + fold_left_binop(pair, build_pow_expr, |s| { + if s == "*" { + BinOp::Mul + } else { + // The grammar restricts mul_op to "*" | "/". + BinOp::Div + } }) } -fn build_pow_expr(pair: Pair) -> Result { +fn build_pow_expr(pair: Pair) -> Expr { // pow_expr = { unary ~ (pow_op ~ unary)? } let mut pairs = pair.into_inner(); - let base_pair = pairs - .next() - .ok_or_else(|| anyhow!("empty pow_expr"))?; - let base = build_unary(base_pair)?; + let base = build_unary(pairs.next().expect(GRAMMAR_INVARIANT)); match pairs.next() { - None => Ok(base), - Some(op_pair) => { - debug_assert_eq!(op_pair.as_rule(), Rule::pow_op); - let exp_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing exponent in pow_expr"))?; - let exp = build_unary(exp_pair)?; - Ok(Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp))) + None => base, + Some(_pow_op) => { + let exp = build_unary(pairs.next().expect(GRAMMAR_INVARIANT)); + Expr::BinOp(BinOp::Pow, Box::new(base), Box::new(exp)) } } } -fn build_unary(pair: Pair) -> Result { +fn build_unary(pair: Pair) -> Expr { // unary = { unary_minus | primary } - let inner = first_inner(pair, "unary")?; - match inner.as_rule() { - Rule::unary_minus => { - let prim = first_inner(inner, "unary_minus")?; - Ok(Expr::UnaryMinus(Box::new(build_primary(prim)?))) - } - Rule::primary => build_primary(inner), - r => Err(anyhow!("unexpected rule in unary: {:?}", r)), + let inner = first_inner(pair); + if inner.as_rule() == Rule::unary_minus { + Expr::UnaryMinus(Box::new(build_primary(first_inner(inner)))) + } else { + // primary is the only other alternative. + build_primary(inner) } } -fn build_primary(pair: Pair) -> Result { +fn build_primary(pair: Pair) -> Expr { // primary = { number | agg_call | if_expr | paren_expr | ref_expr } - let inner = first_inner(pair, "primary")?; - match inner.as_rule() { - Rule::number => Ok(Expr::Number(inner.as_str().parse()?)), - Rule::agg_call => build_agg_call(inner), - Rule::if_expr => build_if_expr(inner), - Rule::paren_expr => build_expr(first_inner(inner, "paren_expr")?), - Rule::ref_expr => { - let id_pair = first_inner(inner, "ref_expr")?; - Ok(Expr::Ref(identifier_to_string(id_pair))) - } - r => Err(anyhow!("unexpected rule in primary: {:?}", r)), + let inner = first_inner(pair); + let rule = inner.as_rule(); + if rule == Rule::number { + Expr::Number(inner.as_str().parse().expect(GRAMMAR_INVARIANT)) + } else if rule == Rule::agg_call { + build_agg_call(inner) + } else if rule == Rule::if_expr { + build_if_expr(inner) + } else if rule == Rule::paren_expr { + build_expr(first_inner(inner)) + } else { + // ref_expr is the only remaining alternative. + Expr::Ref(identifier_to_string(first_inner(inner))) } } -fn build_agg_call(pair: Pair) -> Result { +fn build_agg_call(pair: Pair) -> Expr { // agg_call = { agg_func ~ "(" ~ expr ~ inline_where? ~ ")" } let mut pairs = pair.into_inner(); - let func_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing agg_func"))?; - let func = parse_agg_func(func_pair.as_str())?; - let expr_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing aggregate argument"))?; - let inner_expr = build_expr(expr_pair)?; - let filter = match pairs.next() { - Some(p) if p.as_rule() == Rule::inline_where => Some(build_filter(p)?), - _ => None, - }; - Ok(Expr::Agg(func, Box::new(inner_expr), filter)) + let func = parse_agg_func(pairs.next().expect(GRAMMAR_INVARIANT).as_str()); + let inner_expr = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + // The only pair after expr (if any) is `inline_where`, so we can + // map it directly without checking the rule variant. + let filter = pairs.next().map(build_filter); + Expr::Agg(func, Box::new(inner_expr), filter) } -fn parse_agg_func(s: &str) -> Result { +fn parse_agg_func(s: &str) -> AggFunc { + // agg_func = { ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" | ^"COUNT" } match s.to_ascii_uppercase().as_str() { - "SUM" => Ok(AggFunc::Sum), - "AVG" => Ok(AggFunc::Avg), - "MIN" => Ok(AggFunc::Min), - "MAX" => Ok(AggFunc::Max), - "COUNT" => Ok(AggFunc::Count), - f => Err(anyhow!("unknown aggregate function: {}", f)), + "SUM" => AggFunc::Sum, + "AVG" => AggFunc::Avg, + "MIN" => AggFunc::Min, + "MAX" => AggFunc::Max, + // COUNT is the only remaining alternative. + _ => AggFunc::Count, } } -fn build_if_expr(pair: Pair) -> Result { +fn build_if_expr(pair: Pair) -> Expr { // if_expr = { ^"IF" ~ "(" ~ comparison ~ "," ~ expr ~ "," ~ expr ~ ")" } let mut pairs = pair.into_inner(); - let cond_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing IF condition"))?; - let cond = build_comparison(cond_pair)?; - let then_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing IF then-branch"))?; - let then_e = build_expr(then_pair)?; - let else_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing IF else-branch"))?; - let else_e = build_expr(else_pair)?; - Ok(Expr::If( - Box::new(cond), - Box::new(then_e), - Box::new(else_e), - )) + let cond = build_comparison(pairs.next().expect(GRAMMAR_INVARIANT)); + let then_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + let else_e = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + Expr::If(Box::new(cond), Box::new(then_e), Box::new(else_e)) } -fn build_comparison(pair: Pair) -> Result { +fn build_comparison(pair: Pair) -> Expr { // comparison = { expr ~ cmp_op ~ expr } let mut pairs = pair.into_inner(); - let lhs_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing comparison lhs"))?; - let lhs = build_expr(lhs_pair)?; - let op_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing comparison operator"))?; - let op = parse_cmp_op(op_pair.as_str())?; - let rhs_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing comparison rhs"))?; - let rhs = build_expr(rhs_pair)?; - Ok(Expr::BinOp(op, Box::new(lhs), Box::new(rhs))) + let lhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + let op = parse_cmp_op(pairs.next().expect(GRAMMAR_INVARIANT).as_str()); + let rhs = build_expr(pairs.next().expect(GRAMMAR_INVARIANT)); + Expr::BinOp(op, Box::new(lhs), Box::new(rhs)) } -fn parse_cmp_op(s: &str) -> Result { +fn parse_cmp_op(s: &str) -> BinOp { + // cmp_op = { "!=" | "<=" | ">=" | "<" | ">" | "=" } match s { - "=" => Ok(BinOp::Eq), - "!=" => Ok(BinOp::Ne), - "<" => Ok(BinOp::Lt), - ">" => Ok(BinOp::Gt), - "<=" => Ok(BinOp::Le), - ">=" => Ok(BinOp::Ge), - o => Err(anyhow!("unknown comparison operator: {}", o)), + "=" => BinOp::Eq, + "!=" => BinOp::Ne, + "<" => BinOp::Lt, + ">" => BinOp::Gt, + "<=" => BinOp::Le, + // ">=" is the only remaining alternative. + _ => BinOp::Ge, } } -fn build_filter(pair: Pair) -> Result { +fn build_filter(pair: Pair) -> Filter { // where_clause / inline_where both have shape: // ^"WHERE" ~ identifier ~ "=" ~ filter_value let mut pairs = pair.into_inner(); - let cat_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing WHERE category"))?; - let category = identifier_to_string(cat_pair); - let val_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing WHERE value"))?; - let item = filter_value_to_string(val_pair); - Ok(Filter { category, item }) + let category = identifier_to_string(pairs.next().expect(GRAMMAR_INVARIANT)); + let item = filter_value_to_string(pairs.next().expect(GRAMMAR_INVARIANT)); + Filter { category, item } } fn filter_value_to_string(pair: Pair) -> String { // filter_value = { string | pipe_quoted | bare_ident } - let inner = pair - .into_inner() - .next() - .expect("filter_value must have an inner pair"); + let inner = first_inner(pair); let s = inner.as_str(); - match inner.as_rule() { - Rule::string => strip_string_quotes(s), - Rule::pipe_quoted => unquote_pipe(s), - _ => s.to_string(), + let rule = inner.as_rule(); + if rule == Rule::string { + strip_string_quotes(s) + } else if rule == Rule::pipe_quoted { + unquote_pipe(s) + } else { + // bare_ident is the only remaining alternative. + s.to_string() } } @@ -257,8 +239,8 @@ fn strip_string_quotes(s: &str) -> String { } /// Strip surrounding pipes and apply backslash escapes: `\|` → `|`, -/// `\\` → `\`, `\n` → newline. Matches the escape semantics documented -/// in src/persistence/improv.pest. +/// `\\` → `\`, `\n` → newline, and any other `\X` is preserved verbatim. +/// Matches the escape semantics documented in src/persistence/improv.pest. fn unquote_pipe(s: &str) -> String { debug_assert!(is_pipe_quoted(s)); let inner = &s[1..s.len() - 1]; @@ -266,15 +248,18 @@ fn unquote_pipe(s: &str) -> String { let mut chars = inner.chars(); while let Some(c) = chars.next() { if c == '\\' { - match chars.next() { - Some('|') => out.push('|'), - Some('\\') => out.push('\\'), - Some('n') => out.push('\n'), - Some(other) => { + // The grammar rule `"\\" ~ ANY` guarantees that a backslash + // inside a pipe-quoted identifier is always followed by another + // character, so `chars.next()` cannot be `None` here. + let escaped = chars.next().expect(GRAMMAR_INVARIANT); + match escaped { + '|' => out.push('|'), + '\\' => out.push('\\'), + 'n' => out.push('\n'), + other => { out.push('\\'); out.push(other); } - None => out.push('\\'), } } else { out.push(c); @@ -285,35 +270,27 @@ fn unquote_pipe(s: &str) -> String { // ---- small helpers ------------------------------------------------------ -fn first_inner<'a>(pair: Pair<'a, Rule>, ctx: &str) -> Result> { - pair.into_inner() - .next() - .ok_or_else(|| anyhow!("empty rule: {}", ctx)) +fn first_inner(pair: Pair<'_, Rule>) -> Pair<'_, Rule> { + pair.into_inner().next().expect(GRAMMAR_INVARIANT) } /// Fold a left-associative binary-operator rule of the shape /// `rule = { child ~ (op ~ child)* }` into a left-leaning BinOp tree. -fn fold_left_binop(pair: Pair, mut build_child: F, match_op: M) -> Result +/// The `match_op` closure is infallible — the grammar guarantees the +/// operator token is one of the expected alternatives. +fn fold_left_binop(pair: Pair, mut build_child: F, match_op: M) -> Expr where - F: FnMut(Pair) -> Result, - M: Fn(&str) -> Option, + F: FnMut(Pair) -> Expr, + M: Fn(&str) -> BinOp, { let mut pairs = pair.into_inner(); - let first = pairs - .next() - .ok_or_else(|| anyhow!("empty binop rule"))?; - let mut left = build_child(first)?; + let mut left = build_child(pairs.next().expect(GRAMMAR_INVARIANT)); while let Some(op_pair) = pairs.next() { - let op = match_op(op_pair.as_str()).ok_or_else(|| { - anyhow!("unexpected operator token: {:?}", op_pair.as_str()) - })?; - let right_pair = pairs - .next() - .ok_or_else(|| anyhow!("missing rhs for operator"))?; - let right = build_child(right_pair)?; + let op = match_op(op_pair.as_str()); + let right = build_child(pairs.next().expect(GRAMMAR_INVARIANT)); left = Expr::BinOp(op, Box::new(left), Box::new(right)); } - Ok(left) + left } #[cfg(test)] @@ -636,4 +613,458 @@ mod tests { panic!("Expected Ref, got: {:?}", f.expr); } } + + #[test] + fn pipe_quoted_escape_double_backslash() { + // \\ inside a pipe-quoted identifier is a literal backslash + let f = parse_formula("X = |A\\\\B|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A\\B"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_escape_newline() { + // \n inside a pipe-quoted identifier is a literal newline + let f = parse_formula("X = |A\\nB|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A\nB"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + #[test] + fn pipe_quoted_unknown_escape_preserved() { + // Any `\X` where X isn't |, \, or n is preserved verbatim as + // backslash-plus-character. The grammar's `"\\" ~ ANY` allows + // any following character; we just don't interpret it. + let f = parse_formula("X = |A\\zB|", "Cat").unwrap(); + if let Expr::Ref(ref s) = f.expr { + assert_eq!(s, "A\\zB"); + } else { + panic!("Expected Ref, got: {:?}", f.expr); + } + } + + // ── Operator precedence and associativity ────────────────────────── + + #[test] + fn mul_binds_tighter_than_add() { + // `A + B * C` must parse as Add(A, Mul(B, C)) + let f = parse_formula("X = A + B * C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Add, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(_))); + assert!(matches!(**rhs, Expr::BinOp(BinOp::Mul, _, _))); + } else { + panic!("Expected Add with Mul on rhs, got: {:?}", f.expr); + } + } + + #[test] + fn pow_binds_tighter_than_mul() { + // `A * B ^ C` must parse as Mul(A, Pow(B, C)) + let f = parse_formula("X = A * B ^ C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Mul, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::Ref(_))); + assert!(matches!(**rhs, Expr::BinOp(BinOp::Pow, _, _))); + } else { + panic!("Expected Mul with Pow on rhs, got: {:?}", f.expr); + } + } + + #[test] + fn subtraction_is_left_associative() { + // `A - B - C` must parse as Sub(Sub(A, B), C), not Sub(A, Sub(B, C)) + let f = parse_formula("X = A - B - C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Sub, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::BinOp(BinOp::Sub, _, _))); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); + } else { + panic!("Expected left-associative Sub, got: {:?}", f.expr); + } + } + + #[test] + fn division_is_left_associative() { + // `A / B / C` must parse as Div(Div(A, B), C) + let f = parse_formula("X = A / B / C", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Div, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::BinOp(BinOp::Div, _, _))); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "C")); + } else { + panic!("Expected left-associative Div, got: {:?}", f.expr); + } + } + + #[test] + fn unary_minus_before_pow() { + // `-A ^ B` — hand-rolled parser parses this as UnaryMinus(Ref(A)) + // followed by nothing because pow is inside unary's recursion. + // My grammar: unary = unary_minus | primary, unary_minus = "-" ~ primary. + // So `-A` is UnaryMinus(Ref(A)); `^B` can't attach because pow_expr + // sits OUTSIDE unary. Result: pow_expr(unary_minus(A)) — no pow + // because the optional pow_op doesn't have another unary to bind to. + // Verify: `-A ^ B` parses to BinOp(Pow, UnaryMinus(A), B). + let f = parse_formula("X = -A ^ B", "Cat").unwrap(); + if let Expr::BinOp(BinOp::Pow, lhs, rhs) = &f.expr { + assert!(matches!(**lhs, Expr::UnaryMinus(_))); + assert!(matches!(**rhs, Expr::Ref(ref s) if s == "B")); + } else { + panic!("Expected Pow(UnaryMinus, Ref), got: {:?}", f.expr); + } + } + + // ── Number literal variants ──────────────────────────────────────── + + #[test] + fn integer_literal() { + let f = parse_formula("X = 42", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 42.0).abs() < 1e-10)); + } + + #[test] + fn zero_literal() { + let f = parse_formula("X = 0", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if n == 0.0)); + } + + #[test] + fn decimal_literal_without_integer_part() { + let f = parse_formula("X = .5", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 0.5).abs() < 1e-10)); + } + + #[test] + fn decimal_literal_with_trailing_dot() { + let f = parse_formula("X = 5.", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 5.0).abs() < 1e-10)); + } + + #[test] + fn decimal_literal_with_integer_and_fraction() { + let f = parse_formula("X = 123.456", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Number(n) if (n - 123.456).abs() < 1e-10)); + } + + // ── Filter value variants ────────────────────────────────────────── + + #[test] + fn where_with_bare_identifier_value() { + // filter_value = { string | pipe_quoted | bare_ident } — exercise + // the bare_ident branch. + let f = parse_formula("X = Revenue WHERE Region = East", "Cat").unwrap(); + let filter = f.filter.as_ref().unwrap(); + assert_eq!(filter.category, "Region"); + assert_eq!(filter.item, "East"); + } + + // ── Nested constructs ────────────────────────────────────────────── + + #[test] + fn nested_sum_aggregate() { + // Nested aggregates — outer SUM wrapping an inner SUM. + let f = parse_formula("X = SUM(SUM(Revenue))", "Cat").unwrap(); + if let Expr::Agg(AggFunc::Sum, outer_inner, None) = &f.expr { + assert!(matches!(**outer_inner, Expr::Agg(AggFunc::Sum, _, None))); + } else { + panic!("Expected nested SUM aggregates, got: {:?}", f.expr); + } + } + + #[test] + fn deeply_nested_parens() { + // Parens should flatten away without affecting the AST. + let f = parse_formula("X = (((((A + B)))))", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + #[test] + fn nested_if_expression() { + // IF in the then-branch of another IF. + let f = parse_formula("X = IF(A > B, IF(C > D, 1, 2), 3)", "Cat").unwrap(); + if let Expr::If(_, then_e, else_e) = &f.expr { + assert!(matches!(**then_e, Expr::If(_, _, _))); + assert!(matches!(**else_e, Expr::Number(n) if n == 3.0)); + } else { + panic!("Expected outer IF, got: {:?}", f.expr); + } + } + + // ── Whitespace tolerance ─────────────────────────────────────────── + + #[test] + fn tolerates_tabs_between_tokens() { + let f = parse_formula("X\t=\tA\t+\tB", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + #[test] + fn tolerates_extra_spaces_between_tokens() { + let f = parse_formula("X = A + B", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + #[test] + fn tolerates_leading_and_trailing_whitespace() { + let f = parse_formula(" X = A + B ", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::BinOp(BinOp::Add, _, _))); + } + + // ── Case insensitivity of keywords ───────────────────────────────── + + #[test] + fn aggregate_function_is_case_insensitive() { + // The grammar uses ^"SUM" which is case-insensitive. + let f = parse_formula("X = sum(Revenue)", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); + + let f = parse_formula("X = Sum(Revenue)", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::Agg(AggFunc::Sum, _, _))); + } + + #[test] + fn if_keyword_is_case_insensitive() { + let f = parse_formula("X = if(A > B, 1, 0)", "Cat").unwrap(); + assert!(matches!(f.expr, Expr::If(_, _, _))); + } + + #[test] + fn where_keyword_is_case_insensitive() { + let f = parse_formula("X = Revenue where Region = \"East\"", "Cat").unwrap(); + assert!(f.filter.is_some()); + } + + // ── Target variants ──────────────────────────────────────────────── + + #[test] + fn target_with_underscore_and_hyphen() { + let f = parse_formula("my_target-name = A", "Cat").unwrap(); + assert_eq!(f.target, "my_target-name"); + } + + #[test] + fn pipe_quoted_target_preserves_pipes() { + let f = parse_formula("|My Target| = A", "Cat").unwrap(); + assert_eq!(f.target, "|My Target|"); + } +} + +// ---- grammar-driven proptests ------------------------------------------ +// +// Uses pest_meta to read formula.pest at test time and walks the grammar +// AST to generate random valid formulas. Mirrors the pattern used by the +// persistence parser tests in src/persistence/mod.rs. + +#[cfg(test)] +mod generator { + use pest_meta::ast::{Expr, RuleType}; + use pest_meta::parser; + use proptest::prelude::*; + use std::collections::HashMap; + + /// Parse formula.pest and return rules keyed by name. + fn load_grammar() -> HashMap { + let grammar = include_str!("formula.pest"); + let pairs = parser::parse(parser::Rule::grammar_rules, grammar) + .unwrap_or_else(|e| panic!("Bad grammar: {e}")); + let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}")); + rules + .into_iter() + .map(|r| (r.name.clone(), (r.ty, r.expr))) + .collect() + } + + /// Recursive string generator driven by a pest `Expr`. `choices` is + /// consumed left-to-right at every decision point; the `atomic` flag + /// controls whether sequences insert a space between their children, + /// so that non-atomic grammar rules produce whitespace-separated + /// tokens that pest's implicit WHITESPACE handling accepts. + pub struct Gen<'g> { + rules: &'g HashMap, + choices: Vec, + pos: usize, + } + + impl<'g> Gen<'g> { + pub fn new(rules: &'g HashMap, choices: Vec) -> Self { + Self { + rules, + choices, + pos: 0, + } + } + + fn pick(&mut self) -> u8 { + let v = self.choices.get(self.pos).copied().unwrap_or(0); + self.pos += 1; + v + } + + fn emit(&mut self, expr: &Expr, out: &mut String, atomic: bool) { + match expr { + Expr::Str(s) => out.push_str(s), + Expr::Insens(s) => out.push_str(s), + Expr::Range(lo, hi) => { + let lo = lo.chars().next().unwrap() as u32; + let hi = hi.chars().next().unwrap() as u32; + let range = hi - lo + 1; + let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap(); + out.push(ch); + } + Expr::Ident(name) => self.emit_ident(name, out, atomic), + Expr::Seq(a, b) => { + self.emit(a, out, atomic); + if !atomic { + out.push(' '); + } + self.emit(b, out, atomic); + } + Expr::Choice(a, b) => { + let mut alts: Vec<&Expr> = vec![a.as_ref()]; + let mut cur = b.as_ref(); + while let Expr::Choice(l, r) = cur { + alts.push(l.as_ref()); + cur = r.as_ref(); + } + alts.push(cur); + let idx = self.pick() as usize % alts.len(); + self.emit(alts[idx], out, atomic); + } + Expr::Opt(inner) => { + // ~66% chance of emitting + if !self.pick().is_multiple_of(3) { + self.emit(inner, out, atomic); + } + } + Expr::Rep(inner) => { + let count = self.pick() % 4; + for i in 0..count { + if i > 0 && !atomic { + out.push(' '); + } + self.emit(inner, out, atomic); + } + } + Expr::RepOnce(inner) => { + let count = 1 + self.pick() % 3; + for i in 0..count { + if i > 0 && !atomic { + out.push(' '); + } + self.emit(inner, out, atomic); + } + } + Expr::NegPred(_) | Expr::PosPred(_) => { + // Lookaheads don't emit output. + } + _ => { + // Skip unsupported expressions. + } + } + } + + fn emit_ident(&mut self, name: &str, out: &mut String, atomic: bool) { + match name { + "ANY" | "ASCII_ALPHA" => { + out.push((b'a' + self.pick() % 26) as char); + } + "ASCII_ALPHANUMERIC" => { + if self.pick().is_multiple_of(2) { + out.push((b'a' + self.pick() % 26) as char); + } else { + out.push((b'0' + self.pick() % 10) as char); + } + } + "ASCII_DIGIT" => { + out.push((b'0' + self.pick() % 10) as char); + } + "NEWLINE" => out.push('\n'), + "SOI" | "EOI" => {} + _ => { + if let Some((ty, inner)) = self.rules.get(name).cloned() { + let inner_atomic = atomic + || matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); + self.emit(&inner, out, inner_atomic); + } + } + } + } + + pub fn generate(&mut self, rule_name: &str) -> String { + let mut out = String::new(); + if let Some((ty, expr)) = self.rules.get(rule_name).cloned() { + let atomic = matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic); + self.emit(&expr, &mut out, atomic); + } + out + } + } + + /// Proptest strategy: generate a random valid formula by walking the + /// grammar's `formula` rule. + pub fn formula_string() -> impl Strategy { + prop::collection::vec(any::(), 32..=128).prop_map(|choices| { + let rules = load_grammar(); + Gen::new(&rules, choices).generate("formula") + }) + } + + /// Proptest strategy: generate a random valid standalone expression. + pub fn expr_string() -> impl Strategy { + prop::collection::vec(any::(), 32..=128).prop_map(|choices| { + let rules = load_grammar(); + Gen::new(&rules, choices).generate("expr_eoi") + }) + } +} + +#[cfg(test)] +mod grammar_prop_tests { + use super::generator; + use super::{parse_expr, parse_formula}; + use proptest::prelude::*; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Every generator-produced formula parses without error. + #[test] + fn generated_formula_parses(formula in generator::formula_string()) { + let result = parse_formula(&formula, "Cat"); + prop_assert!( + result.is_ok(), + "Generated formula failed to parse:\n{}\nError: {}", + formula, + result.unwrap_err() + ); + } + + /// Every generator-produced standalone expression parses. + #[test] + fn generated_expr_parses(expr in generator::expr_string()) { + let result = parse_expr(&expr); + prop_assert!( + result.is_ok(), + "Generated expression failed to parse:\n{}\nError: {}", + expr, + result.unwrap_err() + ); + } + + /// Parsing the same input twice is deterministic — the debug + /// representation of the resulting AST is identical. + #[test] + fn parse_is_deterministic(formula in generator::formula_string()) { + let r1 = parse_formula(&formula, "Cat"); + let r2 = parse_formula(&formula, "Cat"); + prop_assume!(r1.is_ok() && r2.is_ok()); + let f1 = r1.unwrap(); + let f2 = r2.unwrap(); + prop_assert_eq!(&f1.target, &f2.target); + prop_assert_eq!(format!("{:?}", f1.expr), format!("{:?}", f2.expr)); + } + } } From 709f2df11fdf38561b439971eb435a0935152478 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 04:32:14 -0700 Subject: [PATCH 12/13] fix(model): initialize virtual categories with Axis::None Ensure that virtual categories (_Index, _Dim, _Measure) are registered in the default view with Axis::None. This prevents potential panics when calling axis_of on these categories and allows users to move them to a specific axis manually. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf) --- src/model/types.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/model/types.rs b/src/model/types.rs index 4c44360..deca012 100644 --- a/src/model/types.rs +++ b/src/model/types.rs @@ -61,15 +61,10 @@ impl Model { measure_agg: HashMap::new(), formula_cache: HashMap::new(), }; - // Add virtuals to existing views (default view). - // Start in records mode; on_category_added will reclaim Row/Column - // for the first two regular categories. for view in m.views.values_mut() { view.on_category_added("_Index"); view.on_category_added("_Dim"); view.on_category_added("_Measure"); - view.set_axis("_Index", crate::view::Axis::Row); - view.set_axis("_Dim", crate::view::Axis::Column); } m } @@ -866,6 +861,17 @@ mod model_tests { let _ = m.active_view(); } + /// improvise-kos: virtual categories must be registered in the default view + /// at Axis::None so they don't panic on axis_of and can be moved by the user. + #[test] + fn new_model_registers_virtuals_in_default_view() { + let m = Model::new("Test"); + let v = m.active_view(); + assert_eq!(v.axis_of("_Index"), Axis::None); + assert_eq!(v.axis_of("_Dim"), Axis::None); + assert_eq!(v.axis_of("_Measure"), Axis::None); + } + #[test] fn add_category_creates_entry() { let mut m = Model::new("Test"); From 6d4b19a94069df9f8c9b7ab0adf8234318aade81 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Wed, 15 Apr 2026 04:37:06 -0700 Subject: [PATCH 13/13] fix(model): default _Measure to Page axis, skip empty page categories (improvise-kos) Virtual categories _Index and _Dim now default to None on new models instead of being forced onto Row/Column. _Measure defaults to Page (the natural home for measure filtering). Fixed page_coords builder to skip Page categories with no items/selection, preventing empty-string contamination of cell keys. Made-with: Cursor --- src/model/types.rs | 5 +++-- src/view/layout.rs | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/model/types.rs b/src/model/types.rs index deca012..bc1ca4f 100644 --- a/src/model/types.rs +++ b/src/model/types.rs @@ -65,6 +65,7 @@ impl Model { view.on_category_added("_Index"); view.on_category_added("_Dim"); view.on_category_added("_Measure"); + view.set_axis("_Measure", crate::view::Axis::Page); } m } @@ -869,7 +870,7 @@ mod model_tests { let v = m.active_view(); assert_eq!(v.axis_of("_Index"), Axis::None); assert_eq!(v.axis_of("_Dim"), Axis::None); - assert_eq!(v.axis_of("_Measure"), Axis::None); + assert_eq!(v.axis_of("_Measure"), Axis::Page); } #[test] @@ -1950,7 +1951,7 @@ mod five_category { assert_eq!(v.axis_of("Product"), Axis::Column); assert_eq!(v.axis_of("Channel"), Axis::Page); assert_eq!(v.axis_of("Time"), Axis::Page); - assert_eq!(v.axis_of("_Measure"), Axis::None); + assert_eq!(v.axis_of("_Measure"), Axis::Page); } #[test] diff --git a/src/view/layout.rs b/src/view/layout.rs index 37c457b..a549b08 100644 --- a/src/view/layout.rs +++ b/src/view/layout.rs @@ -93,14 +93,13 @@ impl GridLayout { let page_coords = page_cats .iter() - .map(|cat| { + .filter_map(|cat| { let items: Vec = model.effective_item_names(cat); let sel = view .page_selection(cat) .map(String::from) - .or_else(|| items.first().cloned()) - .unwrap_or_default(); - (cat.clone(), sel) + .or_else(|| items.first().cloned())?; + Some((cat.clone(), sel)) }) .collect(); @@ -594,6 +593,9 @@ mod tests { ]), CellValue::Number(50.0), ); + // Records mode setup: _Measure should not filter + m.active_view_mut() + .set_axis("_Measure", Axis::None); m }