diff --git a/crates/improvise-io/src/persistence/improv.pest b/crates/improvise-io/src/persistence/improv.pest index fcbf4ac..a136b01 100644 --- a/crates/improvise-io/src/persistence/improv.pest +++ b/crates/improvise-io/src/persistence/improv.pest @@ -58,10 +58,17 @@ formulas_section = { formula_line* } +// `- raw expression [Category]` — the trailing `[Category]` suffix is +// optional (defaults to _Measure) and its name follows the usual quoting +// rules. `formula_raw` stops where a suffix-then-newline (or bare newline) +// begins, so pipe-quoted identifiers containing " [" stay in the raw text. formula_line = { - "- " ~ rest_of_line ~ NEWLINE ~ blank_lines + "- " ~ formula_raw ~ formula_target? ~ NEWLINE ~ blank_lines } +formula_target = _{ " "* ~ "[" ~ name ~ "]" } +formula_raw = @{ (!(formula_target? ~ NEWLINE) ~ ANY)+ } + // ── Data ───────────────────────────────────────────────────────────────────── data_section = { diff --git a/crates/improvise-io/src/persistence/mod.rs b/crates/improvise-io/src/persistence/mod.rs index ee7e32c..4bc26b6 100644 --- a/crates/improvise-io/src/persistence/mod.rs +++ b/crates/improvise-io/src/persistence/mod.rs @@ -224,7 +224,7 @@ pub fn format_md(workbook: &Workbook) -> String { if f.target_category == "_Measure" { w!(out, "- {}", f.raw); } else { - w!(out, "- {} [{}]", f.raw, f.target_category); + w!(out, "- {} [{}]", f.raw, quote_name(&f.target_category)); } } } @@ -302,7 +302,17 @@ pub fn parse_md(text: &str) -> Result { use pest::iterators::{Pair, Pairs}; let file = ImprovParser::parse(Rule::file, text) - .map_err(|e| anyhow::anyhow!("Parse error: {e}"))? + .map_err(|e| { + use pest::error::LineColLocation; + let (line, col) = match e.line_col { + LineColLocation::Pos(p) | LineColLocation::Span(p, _) => p, + }; + let line_text = text.lines().nth(line - 1).unwrap_or("").trim_end(); + anyhow::anyhow!( + "Parse error at line {line}, column {col}: {line_text}\n ({})", + e.variant.message() + ) + })? .next() .ok_or_else(|| anyhow::anyhow!("Empty parse result"))?; @@ -410,19 +420,18 @@ pub fn parse_md(text: &str) -> Result { Rule::formulas_section => { for fl in pair.into_inner() { if fl.as_rule() == Rule::formula_line { - let raw = first_str(fl)?; - if let Some(i) = raw.rfind(" [") - && raw.ends_with(']') - { - formulas.push(( - raw[..i].to_string(), - raw[i + 2..raw.len() - 1].to_string(), - )); - continue; - } + let mut parts = fl.into_inner(); + let raw = next(&mut parts, "formula_line")? + .as_str() + .trim() + .to_string(); // No [Category] suffix — default to _Measure - if !raw.is_empty() && raw.contains('=') { - formulas.push((raw, "_Measure".to_string())); + let category = match parts.next() { + Some(name_pair) => extract_name(name_pair)?, + None => "_Measure".to_string(), + }; + if raw.contains('=') { + formulas.push((raw, category)); } } } @@ -904,6 +913,59 @@ mod tests { assert_eq!(f.target_category, "Type"); } + /// Bug improvise-zgb (b): the formulas walker located the `[Category]` + /// suffix via `raw.rfind(" [")`, so a hand-written line with no space + /// before the bracket (`- Total = Food + Gas[Type]`) was not recognized + /// and the formula was silently retargeted to _Measure. The suffix is now + /// part of the grammar (`formula_line`) and walked structurally. + #[test] + fn parse_md_formula_category_suffix_without_space() { + let text = "v2025-04-09\n# T\n\ + ## Category: Type\n\ + - Food, Gas, Total\n\ + ## Formulas\n\ + - Total = Food + Gas[Type]\n"; + let m = parse_md(text).unwrap(); + let f = &m.model.formulas()[0]; + assert_eq!(f.raw, "Total = Food + Gas"); + assert_eq!(f.target_category, "Type"); + } + + /// Bug improvise-zgb (a): `rfind(" [")` mis-split a suffix whose category + /// name itself contains " [" — the writer emitted it unquoted + /// (`[Margin [%]]`) and the parser then split inside the name, producing + /// garbage raw text and category "%]". Category names in the suffix are + /// now pipe-quoted on write and parsed via the grammar's `name` rule. + #[test] + fn parse_md_round_trips_formula_category_with_special_chars() { + let mut m = two_cat_model(); + m.add_category("Margin [%]").unwrap(); + m.model + .category_mut("Margin [%]") + .unwrap() + .add_item("Total"); + m.model + .add_formula(parse_formula("Total = Food + Gas", "Margin [%]").unwrap()); + let m2 = parse_md(&format_md(&m)).unwrap(); + let f = &m2.model.formulas()[0]; + assert_eq!(f.raw, "Total = Food + Gas"); + assert_eq!(f.target_category, "Margin [%]"); + } + + /// Companion to improvise-zgb: a formula *ending* in a pipe-quoted + /// identifier that contains " [" must not have the quoted name mistaken + /// for a category suffix — it stays a _Measure formula with intact raw. + #[test] + fn parse_md_formula_ending_in_bracketed_pipe_name_keeps_raw() { + let text = "v2025-04-09\n# T\n\ + ## Formulas\n\ + - Total = Cost + |Revenue [USD]|\n"; + let m = parse_md(text).unwrap(); + let f = &m.model.formulas()[0]; + assert_eq!(f.raw, "Total = Cost + |Revenue [USD]|"); + assert_eq!(f.target_category, "_Measure"); + } + #[test] fn parse_md_round_trips_hidden_item() { let _ = two_cat_model(); @@ -2071,6 +2133,21 @@ mod parser_edge_cases { ); } + /// Bug improvise-6kj: parse failures were wrapped as `Parse error: {e}`, + /// leaning on pest's raw rendering instead of stating the location in + /// plain words. The message must name the line number and quote the + /// offending line so a user can find the problem in a hand-edited file. + #[test] + fn parse_error_names_line_number_and_contents() { + let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n"; + let err = parse_md(text).unwrap_err().to_string(); + assert!(err.contains("line 4"), "missing line number: {err}"); + assert!( + err.contains("Type=Food = 3.14.15"), + "missing line contents: {err}" + ); + } + #[test] fn parse_data_without_value() { // Malformed data line: no " = " separator — pest rejects it