fix(io): improve persistence robustness and CSV compliance

Improved numeric parsing in Markdown files to surface errors instead of defaulting to 0.0. `export_csv` now uses `csv::Writer` to ensure RFC 4180 compliance (e.g., quoting fields containing commas). `export_csv` now correctly respects the view's number format. Add regression tests for numeric error handling and RFC 4180 compliance. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
2026-06-09 21:43:13 -07:00
parent 77a5124d16
commit 47ad9e6032
1 changed files with 115 additions and 19 deletions
@@ -106,6 +106,17 @@ fn format_number(n: f64) -> String {
    }
 }
 /// Convert a token matched by the grammar's `number` rule into a cell value.
 ///
 /// The grammar only admits tokens that `f64::from_str` accepts (overflow
 /// saturates to ±inf), but if the two grammars ever drift the mismatch must
 /// surface — never silently store a wrong number.
 fn parse_number_cell(s: &str) -> CellValue {
    s.parse()
        .map(CellValue::Number)
        .unwrap_or_else(|_| CellValue::Error(format!("invalid number: {s}")))
 }
 // ── File I/O ─────────────────────────────────────────────────────────────────
 fn is_gzip(path: &Path) -> bool {
@@ -430,9 +441,7 @@ pub fn parse_md(text: &str) -> Result<Workbook> {
                            .collect::<Result<_>>()?;
                        let value = match value_pair.as_rule() {
-                            Rule::number => {
+                            Rule::number => parse_number_cell(value_pair.as_str()),
                                CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
                            }
                            Rule::pipe_quoted => {
                                let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
                                CellValue::Text(unescape_pipe(inner.as_str()))
@@ -586,8 +595,9 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
    let layout = GridLayout::new(&workbook.model, view);
    let model = &workbook.model;
    let (fmt_comma, fmt_decimals) = crate::format::parse_number_format(&view.number_format);
-    let mut out = String::new();
+    let mut wtr = csv::WriterBuilder::new().flexible(true).from_path(path)?;
    // Header row
    let row_header = layout.row_cats.join("/");
@@ -601,31 +611,28 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
    } else {
        format!("{} ({})", row_header, page_label.join(", "))
    };
    let mut header: Vec<String> = Vec::new();
    if !header_prefix.is_empty() {
-        out.push_str(&header_prefix);
+        header.push(header_prefix);
        out.push(',');
    }
-    let col_labels: Vec<String> = (0..layout.col_count())
+    header.extend((0..layout.col_count()).map(|ci| layout.col_label(ci)));
-        .map(|ci| layout.col_label(ci))
+    wtr.write_record(&header)?;
        .collect();
    out.push_str(&col_labels.join(","));
    out.push('\n');
    // Data rows
    for ri in 0..layout.row_count() {
        let mut record: Vec<String> = Vec::new();
        let row_label = layout.row_label(ri);
        if !row_label.is_empty() {
-            out.push_str(&row_label);
+            record.push(row_label);
            out.push(',');
        }
-        let row_values: Vec<String> = (0..layout.col_count())
+        record.extend(
-            .map(|ci| layout.display_text(model, ri, ci, false, 0))
+            (0..layout.col_count())
-            .collect();
+                .map(|ci| layout.display_text(model, ri, ci, fmt_comma, fmt_decimals)),
-        out.push_str(&row_values.join(","));
+        );
-        out.push('\n');
+        wtr.write_record(&record)?;
    }
-    std::fs::write(path, out)?;
+    wtr.flush()?;
    Ok(())
 }
@@ -1195,6 +1202,46 @@ Type=Food = 42
        );
    }
    /// Bug improvise-1cz: export_csv joined labels/values with bare commas
    /// (no RFC 4180 quoting) and ignored the view's number format. With a
    /// comma-grouping format, a value like 1200 renders as "1,200" and a
    /// label like "Smith, Jr." splits into two fields, corrupting every row.
    #[test]
    fn export_csv_quotes_commas_per_rfc4180() {
        let mut m = Workbook::new("T");
        m.add_category("Type").unwrap();
        m.model.category_mut("Type").unwrap().add_item("Smith, Jr.");
        m.add_category("Month").unwrap();
        m.model.category_mut("Month").unwrap().add_item("Jan");
        m.model.set_cell(
            coord(&[("Type", "Smith, Jr."), ("Month", "Jan")]),
            CellValue::Number(1200.0),
        );
        m.active_view_mut().number_format = ",.2f".to_string();
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("export.csv");
        super::export_csv(&m, "Default", &path).unwrap();
        let mut rdr = csv::ReaderBuilder::new()
            .has_headers(false)
            .from_path(&path)
            .unwrap();
        let records: Vec<csv::StringRecord> =
            rdr.records().collect::<Result<_, _>>().unwrap();
        // Header + one data row, each with exactly 2 fields (label + value)
        assert_eq!(records.len(), 2, "expected header + 1 row: {records:?}");
        for r in &records {
            assert_eq!(r.len(), 2, "comma split a field: {r:?}");
        }
        assert_eq!(&records[1][0], "Smith, Jr.");
        assert!(
            records[1][1].starts_with("1,200"),
            "expected comma-grouped value, got: {:?}",
            &records[1][1]
        );
    }
    #[test]
    fn export_csv_unknown_view_returns_error() {
        let m = two_cat_model();
@@ -1975,6 +2022,55 @@ mod parser_edge_cases {
        assert_eq!(m.model.name, "MyModel");
    }
    /// Bug improvise-4yc: the `Rule::number` arm used
    /// `as_str().parse().unwrap_or(0.0)`, so any numeric token the grammar
    /// admitted but `f64::from_str` rejected would silently become
    /// Number(0.0). The conversion must surface a CellValue::Error naming the
    /// offending text instead of fabricating a zero.
    #[test]
    fn unparseable_number_token_yields_error_not_zero() {
        // No token the current grammar admits actually fails f64 parsing
        // (overflow saturates to inf), so exercise the helper directly with
        // the forms the grammar could plausibly drift toward admitting.
        for bad in ["3.14.15", "1.5e", "--5", ""] {
            let got = super::parse_number_cell(bad);
            match got {
                CellValue::Error(msg) => {
                    assert!(msg.contains(bad) || bad.is_empty(), "error should name {bad:?}: {msg}")
                }
                other => panic!("{bad:?} must not produce {other:?}"),
            }
        }
    }
    /// Bug improvise-4yc (grammar routing): malformed numeric forms in a data
    /// line must never silently load as Number(0.0). The grammar routes them
    /// either to bare_value (Text) or to a parse error that names the text.
    #[test]
    fn malformed_numbers_in_data_never_silently_zero() {
        // "--5" fails the number rule entirely → bare_value → Text
        let text = "v2025-04-09\n# T\n## Data\nType=Food = --5\n";
        let m = parse_md(text).unwrap();
        assert_eq!(
            m.model.get_cell(&coord(&[("Type", "Food")])),
            Some(&CellValue::Text("--5".into()))
        );
        // "3.14.15" — number matches the "3.14" prefix and commits, the rest
        // fails → whole-file parse error whose message shows the line
        let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n";
        let err = parse_md(text).unwrap_err().to_string();
        assert!(err.contains("3.14.15"), "error must name the text: {err}");
        // "1e999" is admitted; f64 parsing saturates to +inf, not 0.0
        let text = "v2025-04-09\n# T\n## Data\nType=Food = 1e999\n";
        let m = parse_md(text).unwrap();
        assert_eq!(
            m.model.get_cell(&coord(&[("Type", "Food")])),
            Some(&CellValue::Number(f64::INFINITY))
        );
    }
    #[test]
    fn parse_data_without_value() {
        // Malformed data line: no " = " separator — pest rejects it