fix(io): improve persistence robustness and CSV compliance

Improved numeric parsing in Markdown files to surface errors instead of
defaulting to 0.0.

`export_csv` now uses `csv::Writer` to ensure RFC 4180 compliance (e.g.,
quoting fields containing commas).

`export_csv` now correctly respects the view's number format.

Add regression tests for numeric error handling and RFC 4180 compliance.

Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
This commit is contained in:
Edward Langley
2026-06-09 21:43:13 -07:00
parent 77a5124d16
commit 47ad9e6032
+115 -19
View File
@@ -106,6 +106,17 @@ fn format_number(n: f64) -> String {
}
}
/// Convert a token matched by the grammar's `number` rule into a cell value.
///
/// The grammar only admits tokens that `f64::from_str` accepts (overflow
/// saturates to ±inf), but if the two grammars ever drift the mismatch must
/// surface — never silently store a wrong number.
fn parse_number_cell(s: &str) -> CellValue {
s.parse()
.map(CellValue::Number)
.unwrap_or_else(|_| CellValue::Error(format!("invalid number: {s}")))
}
// ── File I/O ─────────────────────────────────────────────────────────────────
fn is_gzip(path: &Path) -> bool {
@@ -430,9 +441,7 @@ pub fn parse_md(text: &str) -> Result<Workbook> {
.collect::<Result<_>>()?;
let value = match value_pair.as_rule() {
Rule::number => {
CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
}
Rule::number => parse_number_cell(value_pair.as_str()),
Rule::pipe_quoted => {
let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
CellValue::Text(unescape_pipe(inner.as_str()))
@@ -586,8 +595,9 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
let layout = GridLayout::new(&workbook.model, view);
let model = &workbook.model;
let (fmt_comma, fmt_decimals) = crate::format::parse_number_format(&view.number_format);
let mut out = String::new();
let mut wtr = csv::WriterBuilder::new().flexible(true).from_path(path)?;
// Header row
let row_header = layout.row_cats.join("/");
@@ -601,31 +611,28 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
} else {
format!("{} ({})", row_header, page_label.join(", "))
};
let mut header: Vec<String> = Vec::new();
if !header_prefix.is_empty() {
out.push_str(&header_prefix);
out.push(',');
header.push(header_prefix);
}
let col_labels: Vec<String> = (0..layout.col_count())
.map(|ci| layout.col_label(ci))
.collect();
out.push_str(&col_labels.join(","));
out.push('\n');
header.extend((0..layout.col_count()).map(|ci| layout.col_label(ci)));
wtr.write_record(&header)?;
// Data rows
for ri in 0..layout.row_count() {
let mut record: Vec<String> = Vec::new();
let row_label = layout.row_label(ri);
if !row_label.is_empty() {
out.push_str(&row_label);
out.push(',');
record.push(row_label);
}
let row_values: Vec<String> = (0..layout.col_count())
.map(|ci| layout.display_text(model, ri, ci, false, 0))
.collect();
out.push_str(&row_values.join(","));
out.push('\n');
record.extend(
(0..layout.col_count())
.map(|ci| layout.display_text(model, ri, ci, fmt_comma, fmt_decimals)),
);
wtr.write_record(&record)?;
}
std::fs::write(path, out)?;
wtr.flush()?;
Ok(())
}
@@ -1195,6 +1202,46 @@ Type=Food = 42
);
}
/// Bug improvise-1cz: export_csv joined labels/values with bare commas
/// (no RFC 4180 quoting) and ignored the view's number format. With a
/// comma-grouping format, a value like 1200 renders as "1,200" and a
/// label like "Smith, Jr." splits into two fields, corrupting every row.
#[test]
fn export_csv_quotes_commas_per_rfc4180() {
let mut m = Workbook::new("T");
m.add_category("Type").unwrap();
m.model.category_mut("Type").unwrap().add_item("Smith, Jr.");
m.add_category("Month").unwrap();
m.model.category_mut("Month").unwrap().add_item("Jan");
m.model.set_cell(
coord(&[("Type", "Smith, Jr."), ("Month", "Jan")]),
CellValue::Number(1200.0),
);
m.active_view_mut().number_format = ",.2f".to_string();
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("export.csv");
super::export_csv(&m, "Default", &path).unwrap();
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_path(&path)
.unwrap();
let records: Vec<csv::StringRecord> =
rdr.records().collect::<Result<_, _>>().unwrap();
// Header + one data row, each with exactly 2 fields (label + value)
assert_eq!(records.len(), 2, "expected header + 1 row: {records:?}");
for r in &records {
assert_eq!(r.len(), 2, "comma split a field: {r:?}");
}
assert_eq!(&records[1][0], "Smith, Jr.");
assert!(
records[1][1].starts_with("1,200"),
"expected comma-grouped value, got: {:?}",
&records[1][1]
);
}
#[test]
fn export_csv_unknown_view_returns_error() {
let m = two_cat_model();
@@ -1975,6 +2022,55 @@ mod parser_edge_cases {
assert_eq!(m.model.name, "MyModel");
}
/// Bug improvise-4yc: the `Rule::number` arm used
/// `as_str().parse().unwrap_or(0.0)`, so any numeric token the grammar
/// admitted but `f64::from_str` rejected would silently become
/// Number(0.0). The conversion must surface a CellValue::Error naming the
/// offending text instead of fabricating a zero.
#[test]
fn unparseable_number_token_yields_error_not_zero() {
// No token the current grammar admits actually fails f64 parsing
// (overflow saturates to inf), so exercise the helper directly with
// the forms the grammar could plausibly drift toward admitting.
for bad in ["3.14.15", "1.5e", "--5", ""] {
let got = super::parse_number_cell(bad);
match got {
CellValue::Error(msg) => {
assert!(msg.contains(bad) || bad.is_empty(), "error should name {bad:?}: {msg}")
}
other => panic!("{bad:?} must not produce {other:?}"),
}
}
}
/// Bug improvise-4yc (grammar routing): malformed numeric forms in a data
/// line must never silently load as Number(0.0). The grammar routes them
/// either to bare_value (Text) or to a parse error that names the text.
#[test]
fn malformed_numbers_in_data_never_silently_zero() {
// "--5" fails the number rule entirely → bare_value → Text
let text = "v2025-04-09\n# T\n## Data\nType=Food = --5\n";
let m = parse_md(text).unwrap();
assert_eq!(
m.model.get_cell(&coord(&[("Type", "Food")])),
Some(&CellValue::Text("--5".into()))
);
// "3.14.15" — number matches the "3.14" prefix and commits, the rest
// fails → whole-file parse error whose message shows the line
let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n";
let err = parse_md(text).unwrap_err().to_string();
assert!(err.contains("3.14.15"), "error must name the text: {err}");
// "1e999" is admitted; f64 parsing saturates to +inf, not 0.0
let text = "v2025-04-09\n# T\n## Data\nType=Food = 1e999\n";
let m = parse_md(text).unwrap();
assert_eq!(
m.model.get_cell(&coord(&[("Type", "Food")])),
Some(&CellValue::Number(f64::INFINITY))
);
}
#[test]
fn parse_data_without_value() {
// Malformed data line: no " = " separator — pest rejects it