fix(io): improve persistence robustness and CSV compliance

Improved numeric parsing in Markdown files to surface errors instead of defaulting to 0.0. `export_csv` now uses `csv::Writer` to ensure RFC 4180 compliance (e.g., quoting fields containing commas). `export_csv` now correctly respects the view's number format. Add regression tests for numeric error handling and RFC 4180 compliance. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
2026-06-09 21:43:13 -07:00
parent 77a5124d16
commit 47ad9e6032
1 changed files with 115 additions and 19 deletions
@@ -106,6 +106,17 @@ fn format_number(n: f64) -> String {
    }
 }

+/// Convert a token matched by the grammar's `number` rule into a cell value.
+///
+/// The grammar only admits tokens that `f64::from_str` accepts (overflow
+/// saturates to ±inf), but if the two grammars ever drift the mismatch must
+/// surface — never silently store a wrong number.
+fn parse_number_cell(s: &str) -> CellValue {
+    s.parse()
+        .map(CellValue::Number)
+        .unwrap_or_else(|_| CellValue::Error(format!("invalid number: {s}")))
+}
+
 // ── File I/O ─────────────────────────────────────────────────────────────────

 fn is_gzip(path: &Path) -> bool {
@@ -430,9 +441,7 @@ pub fn parse_md(text: &str) -> Result<Workbook> {
                            .collect::<Result<_>>()?;

                        let value = match value_pair.as_rule() {
-                            Rule::number => {
-                                CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
-                            }
+                            Rule::number => parse_number_cell(value_pair.as_str()),
                            Rule::pipe_quoted => {
                                let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
                                CellValue::Text(unescape_pipe(inner.as_str()))
@@ -586,8 +595,9 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(

    let layout = GridLayout::new(&workbook.model, view);
    let model = &workbook.model;
+    let (fmt_comma, fmt_decimals) = crate::format::parse_number_format(&view.number_format);

-    let mut out = String::new();
+    let mut wtr = csv::WriterBuilder::new().flexible(true).from_path(path)?;

    // Header row
    let row_header = layout.row_cats.join("/");
@@ -601,31 +611,28 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
    } else {
        format!("{} ({})", row_header, page_label.join(", "))
    };
+    let mut header: Vec<String> = Vec::new();
    if !header_prefix.is_empty() {
-        out.push_str(&header_prefix);
-        out.push(',');
+        header.push(header_prefix);
    }
-    let col_labels: Vec<String> = (0..layout.col_count())
-        .map(|ci| layout.col_label(ci))
-        .collect();
-    out.push_str(&col_labels.join(","));
-    out.push('\n');
+    header.extend((0..layout.col_count()).map(|ci| layout.col_label(ci)));
+    wtr.write_record(&header)?;

    // Data rows
    for ri in 0..layout.row_count() {
+        let mut record: Vec<String> = Vec::new();
        let row_label = layout.row_label(ri);
        if !row_label.is_empty() {
-            out.push_str(&row_label);
-            out.push(',');
+            record.push(row_label);
        }
-        let row_values: Vec<String> = (0..layout.col_count())
-            .map(|ci| layout.display_text(model, ri, ci, false, 0))
-            .collect();
-        out.push_str(&row_values.join(","));
-        out.push('\n');
+        record.extend(
+            (0..layout.col_count())
+                .map(|ci| layout.display_text(model, ri, ci, fmt_comma, fmt_decimals)),
+        );
+        wtr.write_record(&record)?;
    }

-    std::fs::write(path, out)?;
+    wtr.flush()?;
    Ok(())
 }

@@ -1195,6 +1202,46 @@ Type=Food = 42
        );
    }

+    /// Bug improvise-1cz: export_csv joined labels/values with bare commas
+    /// (no RFC 4180 quoting) and ignored the view's number format. With a
+    /// comma-grouping format, a value like 1200 renders as "1,200" and a
+    /// label like "Smith, Jr." splits into two fields, corrupting every row.
+    #[test]
+    fn export_csv_quotes_commas_per_rfc4180() {
+        let mut m = Workbook::new("T");
+        m.add_category("Type").unwrap();
+        m.model.category_mut("Type").unwrap().add_item("Smith, Jr.");
+        m.add_category("Month").unwrap();
+        m.model.category_mut("Month").unwrap().add_item("Jan");
+        m.model.set_cell(
+            coord(&[("Type", "Smith, Jr."), ("Month", "Jan")]),
+            CellValue::Number(1200.0),
+        );
+        m.active_view_mut().number_format = ",.2f".to_string();
+
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("export.csv");
+        super::export_csv(&m, "Default", &path).unwrap();
+
+        let mut rdr = csv::ReaderBuilder::new()
+            .has_headers(false)
+            .from_path(&path)
+            .unwrap();
+        let records: Vec<csv::StringRecord> =
+            rdr.records().collect::<Result<_, _>>().unwrap();
+        // Header + one data row, each with exactly 2 fields (label + value)
+        assert_eq!(records.len(), 2, "expected header + 1 row: {records:?}");
+        for r in &records {
+            assert_eq!(r.len(), 2, "comma split a field: {r:?}");
+        }
+        assert_eq!(&records[1][0], "Smith, Jr.");
+        assert!(
+            records[1][1].starts_with("1,200"),
+            "expected comma-grouped value, got: {:?}",
+            &records[1][1]
+        );
+    }
+
    #[test]
    fn export_csv_unknown_view_returns_error() {
        let m = two_cat_model();
@@ -1975,6 +2022,55 @@ mod parser_edge_cases {
        assert_eq!(m.model.name, "MyModel");
    }

+    /// Bug improvise-4yc: the `Rule::number` arm used
+    /// `as_str().parse().unwrap_or(0.0)`, so any numeric token the grammar
+    /// admitted but `f64::from_str` rejected would silently become
+    /// Number(0.0). The conversion must surface a CellValue::Error naming the
+    /// offending text instead of fabricating a zero.
+    #[test]
+    fn unparseable_number_token_yields_error_not_zero() {
+        // No token the current grammar admits actually fails f64 parsing
+        // (overflow saturates to inf), so exercise the helper directly with
+        // the forms the grammar could plausibly drift toward admitting.
+        for bad in ["3.14.15", "1.5e", "--5", ""] {
+            let got = super::parse_number_cell(bad);
+            match got {
+                CellValue::Error(msg) => {
+                    assert!(msg.contains(bad) || bad.is_empty(), "error should name {bad:?}: {msg}")
+                }
+                other => panic!("{bad:?} must not produce {other:?}"),
+            }
+        }
+    }
+
+    /// Bug improvise-4yc (grammar routing): malformed numeric forms in a data
+    /// line must never silently load as Number(0.0). The grammar routes them
+    /// either to bare_value (Text) or to a parse error that names the text.
+    #[test]
+    fn malformed_numbers_in_data_never_silently_zero() {
+        // "--5" fails the number rule entirely → bare_value → Text
+        let text = "v2025-04-09\n# T\n## Data\nType=Food = --5\n";
+        let m = parse_md(text).unwrap();
+        assert_eq!(
+            m.model.get_cell(&coord(&[("Type", "Food")])),
+            Some(&CellValue::Text("--5".into()))
+        );
+
+        // "3.14.15" — number matches the "3.14" prefix and commits, the rest
+        // fails → whole-file parse error whose message shows the line
+        let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n";
+        let err = parse_md(text).unwrap_err().to_string();
+        assert!(err.contains("3.14.15"), "error must name the text: {err}");
+
+        // "1e999" is admitted; f64 parsing saturates to +inf, not 0.0
+        let text = "v2025-04-09\n# T\n## Data\nType=Food = 1e999\n";
+        let m = parse_md(text).unwrap();
+        assert_eq!(
+            m.model.get_cell(&coord(&[("Type", "Food")])),
+            Some(&CellValue::Number(f64::INFINITY))
+        );
+    }
+
    #[test]
    fn parse_data_without_value() {
        // Malformed data line: no " = " separator — pest rejects it