From 47ad9e603242f1cc460f731e1bbb6d14fb16112f Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Tue, 9 Jun 2026 21:43:13 -0700 Subject: [PATCH] fix(io): improve persistence robustness and CSV compliance Improved numeric parsing in Markdown files to surface errors instead of defaulting to 0.0. `export_csv` now uses `csv::Writer` to ensure RFC 4180 compliance (e.g., quoting fields containing commas). `export_csv` now correctly respects the view's number format. Add regression tests for numeric error handling and RFC 4180 compliance. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL) --- crates/improvise-io/src/persistence/mod.rs | 134 ++++++++++++++++++--- 1 file changed, 115 insertions(+), 19 deletions(-) diff --git a/crates/improvise-io/src/persistence/mod.rs b/crates/improvise-io/src/persistence/mod.rs index 17551d0..ee7e32c 100644 --- a/crates/improvise-io/src/persistence/mod.rs +++ b/crates/improvise-io/src/persistence/mod.rs @@ -106,6 +106,17 @@ fn format_number(n: f64) -> String { } } +/// Convert a token matched by the grammar's `number` rule into a cell value. +/// +/// The grammar only admits tokens that `f64::from_str` accepts (overflow +/// saturates to ±inf), but if the two grammars ever drift the mismatch must +/// surface — never silently store a wrong number. +fn parse_number_cell(s: &str) -> CellValue { + s.parse() + .map(CellValue::Number) + .unwrap_or_else(|_| CellValue::Error(format!("invalid number: {s}"))) +} + // ── File I/O ───────────────────────────────────────────────────────────────── fn is_gzip(path: &Path) -> bool { @@ -430,9 +441,7 @@ pub fn parse_md(text: &str) -> Result { .collect::>()?; let value = match value_pair.as_rule() { - Rule::number => { - CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0)) - } + Rule::number => parse_number_cell(value_pair.as_str()), Rule::pipe_quoted => { let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?; CellValue::Text(unescape_pipe(inner.as_str())) @@ -586,8 +595,9 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<( let layout = GridLayout::new(&workbook.model, view); let model = &workbook.model; + let (fmt_comma, fmt_decimals) = crate::format::parse_number_format(&view.number_format); - let mut out = String::new(); + let mut wtr = csv::WriterBuilder::new().flexible(true).from_path(path)?; // Header row let row_header = layout.row_cats.join("/"); @@ -601,31 +611,28 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<( } else { format!("{} ({})", row_header, page_label.join(", ")) }; + let mut header: Vec = Vec::new(); if !header_prefix.is_empty() { - out.push_str(&header_prefix); - out.push(','); + header.push(header_prefix); } - let col_labels: Vec = (0..layout.col_count()) - .map(|ci| layout.col_label(ci)) - .collect(); - out.push_str(&col_labels.join(",")); - out.push('\n'); + header.extend((0..layout.col_count()).map(|ci| layout.col_label(ci))); + wtr.write_record(&header)?; // Data rows for ri in 0..layout.row_count() { + let mut record: Vec = Vec::new(); let row_label = layout.row_label(ri); if !row_label.is_empty() { - out.push_str(&row_label); - out.push(','); + record.push(row_label); } - let row_values: Vec = (0..layout.col_count()) - .map(|ci| layout.display_text(model, ri, ci, false, 0)) - .collect(); - out.push_str(&row_values.join(",")); - out.push('\n'); + record.extend( + (0..layout.col_count()) + .map(|ci| layout.display_text(model, ri, ci, fmt_comma, fmt_decimals)), + ); + wtr.write_record(&record)?; } - std::fs::write(path, out)?; + wtr.flush()?; Ok(()) } @@ -1195,6 +1202,46 @@ Type=Food = 42 ); } + /// Bug improvise-1cz: export_csv joined labels/values with bare commas + /// (no RFC 4180 quoting) and ignored the view's number format. With a + /// comma-grouping format, a value like 1200 renders as "1,200" and a + /// label like "Smith, Jr." splits into two fields, corrupting every row. + #[test] + fn export_csv_quotes_commas_per_rfc4180() { + let mut m = Workbook::new("T"); + m.add_category("Type").unwrap(); + m.model.category_mut("Type").unwrap().add_item("Smith, Jr."); + m.add_category("Month").unwrap(); + m.model.category_mut("Month").unwrap().add_item("Jan"); + m.model.set_cell( + coord(&[("Type", "Smith, Jr."), ("Month", "Jan")]), + CellValue::Number(1200.0), + ); + m.active_view_mut().number_format = ",.2f".to_string(); + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("export.csv"); + super::export_csv(&m, "Default", &path).unwrap(); + + let mut rdr = csv::ReaderBuilder::new() + .has_headers(false) + .from_path(&path) + .unwrap(); + let records: Vec = + rdr.records().collect::>().unwrap(); + // Header + one data row, each with exactly 2 fields (label + value) + assert_eq!(records.len(), 2, "expected header + 1 row: {records:?}"); + for r in &records { + assert_eq!(r.len(), 2, "comma split a field: {r:?}"); + } + assert_eq!(&records[1][0], "Smith, Jr."); + assert!( + records[1][1].starts_with("1,200"), + "expected comma-grouped value, got: {:?}", + &records[1][1] + ); + } + #[test] fn export_csv_unknown_view_returns_error() { let m = two_cat_model(); @@ -1975,6 +2022,55 @@ mod parser_edge_cases { assert_eq!(m.model.name, "MyModel"); } + /// Bug improvise-4yc: the `Rule::number` arm used + /// `as_str().parse().unwrap_or(0.0)`, so any numeric token the grammar + /// admitted but `f64::from_str` rejected would silently become + /// Number(0.0). The conversion must surface a CellValue::Error naming the + /// offending text instead of fabricating a zero. + #[test] + fn unparseable_number_token_yields_error_not_zero() { + // No token the current grammar admits actually fails f64 parsing + // (overflow saturates to inf), so exercise the helper directly with + // the forms the grammar could plausibly drift toward admitting. + for bad in ["3.14.15", "1.5e", "--5", ""] { + let got = super::parse_number_cell(bad); + match got { + CellValue::Error(msg) => { + assert!(msg.contains(bad) || bad.is_empty(), "error should name {bad:?}: {msg}") + } + other => panic!("{bad:?} must not produce {other:?}"), + } + } + } + + /// Bug improvise-4yc (grammar routing): malformed numeric forms in a data + /// line must never silently load as Number(0.0). The grammar routes them + /// either to bare_value (Text) or to a parse error that names the text. + #[test] + fn malformed_numbers_in_data_never_silently_zero() { + // "--5" fails the number rule entirely → bare_value → Text + let text = "v2025-04-09\n# T\n## Data\nType=Food = --5\n"; + let m = parse_md(text).unwrap(); + assert_eq!( + m.model.get_cell(&coord(&[("Type", "Food")])), + Some(&CellValue::Text("--5".into())) + ); + + // "3.14.15" — number matches the "3.14" prefix and commits, the rest + // fails → whole-file parse error whose message shows the line + let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n"; + let err = parse_md(text).unwrap_err().to_string(); + assert!(err.contains("3.14.15"), "error must name the text: {err}"); + + // "1e999" is admitted; f64 parsing saturates to +inf, not 0.0 + let text = "v2025-04-09\n# T\n## Data\nType=Food = 1e999\n"; + let m = parse_md(text).unwrap(); + assert_eq!( + m.model.get_cell(&coord(&[("Type", "Food")])), + Some(&CellValue::Number(f64::INFINITY)) + ); + } + #[test] fn parse_data_without_value() { // Malformed data line: no " = " separator — pest rejects it