fix(io): improve persistence robustness and CSV compliance
Improved numeric parsing in Markdown files to surface errors instead of defaulting to 0.0. `export_csv` now uses `csv::Writer` to ensure RFC 4180 compliance (e.g., quoting fields containing commas). `export_csv` now correctly respects the view's number format. Add regression tests for numeric error handling and RFC 4180 compliance. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
This commit is contained in:
@@ -106,6 +106,17 @@ fn format_number(n: f64) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a token matched by the grammar's `number` rule into a cell value.
|
||||
///
|
||||
/// The grammar only admits tokens that `f64::from_str` accepts (overflow
|
||||
/// saturates to ±inf), but if the two grammars ever drift the mismatch must
|
||||
/// surface — never silently store a wrong number.
|
||||
fn parse_number_cell(s: &str) -> CellValue {
|
||||
s.parse()
|
||||
.map(CellValue::Number)
|
||||
.unwrap_or_else(|_| CellValue::Error(format!("invalid number: {s}")))
|
||||
}
|
||||
|
||||
// ── File I/O ─────────────────────────────────────────────────────────────────
|
||||
|
||||
fn is_gzip(path: &Path) -> bool {
|
||||
@@ -430,9 +441,7 @@ pub fn parse_md(text: &str) -> Result<Workbook> {
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
let value = match value_pair.as_rule() {
|
||||
Rule::number => {
|
||||
CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
|
||||
}
|
||||
Rule::number => parse_number_cell(value_pair.as_str()),
|
||||
Rule::pipe_quoted => {
|
||||
let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
|
||||
CellValue::Text(unescape_pipe(inner.as_str()))
|
||||
@@ -586,8 +595,9 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
|
||||
|
||||
let layout = GridLayout::new(&workbook.model, view);
|
||||
let model = &workbook.model;
|
||||
let (fmt_comma, fmt_decimals) = crate::format::parse_number_format(&view.number_format);
|
||||
|
||||
let mut out = String::new();
|
||||
let mut wtr = csv::WriterBuilder::new().flexible(true).from_path(path)?;
|
||||
|
||||
// Header row
|
||||
let row_header = layout.row_cats.join("/");
|
||||
@@ -601,31 +611,28 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
|
||||
} else {
|
||||
format!("{} ({})", row_header, page_label.join(", "))
|
||||
};
|
||||
let mut header: Vec<String> = Vec::new();
|
||||
if !header_prefix.is_empty() {
|
||||
out.push_str(&header_prefix);
|
||||
out.push(',');
|
||||
header.push(header_prefix);
|
||||
}
|
||||
let col_labels: Vec<String> = (0..layout.col_count())
|
||||
.map(|ci| layout.col_label(ci))
|
||||
.collect();
|
||||
out.push_str(&col_labels.join(","));
|
||||
out.push('\n');
|
||||
header.extend((0..layout.col_count()).map(|ci| layout.col_label(ci)));
|
||||
wtr.write_record(&header)?;
|
||||
|
||||
// Data rows
|
||||
for ri in 0..layout.row_count() {
|
||||
let mut record: Vec<String> = Vec::new();
|
||||
let row_label = layout.row_label(ri);
|
||||
if !row_label.is_empty() {
|
||||
out.push_str(&row_label);
|
||||
out.push(',');
|
||||
record.push(row_label);
|
||||
}
|
||||
let row_values: Vec<String> = (0..layout.col_count())
|
||||
.map(|ci| layout.display_text(model, ri, ci, false, 0))
|
||||
.collect();
|
||||
out.push_str(&row_values.join(","));
|
||||
out.push('\n');
|
||||
record.extend(
|
||||
(0..layout.col_count())
|
||||
.map(|ci| layout.display_text(model, ri, ci, fmt_comma, fmt_decimals)),
|
||||
);
|
||||
wtr.write_record(&record)?;
|
||||
}
|
||||
|
||||
std::fs::write(path, out)?;
|
||||
wtr.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1195,6 +1202,46 @@ Type=Food = 42
|
||||
);
|
||||
}
|
||||
|
||||
/// Bug improvise-1cz: export_csv joined labels/values with bare commas
|
||||
/// (no RFC 4180 quoting) and ignored the view's number format. With a
|
||||
/// comma-grouping format, a value like 1200 renders as "1,200" and a
|
||||
/// label like "Smith, Jr." splits into two fields, corrupting every row.
|
||||
#[test]
|
||||
fn export_csv_quotes_commas_per_rfc4180() {
|
||||
let mut m = Workbook::new("T");
|
||||
m.add_category("Type").unwrap();
|
||||
m.model.category_mut("Type").unwrap().add_item("Smith, Jr.");
|
||||
m.add_category("Month").unwrap();
|
||||
m.model.category_mut("Month").unwrap().add_item("Jan");
|
||||
m.model.set_cell(
|
||||
coord(&[("Type", "Smith, Jr."), ("Month", "Jan")]),
|
||||
CellValue::Number(1200.0),
|
||||
);
|
||||
m.active_view_mut().number_format = ",.2f".to_string();
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("export.csv");
|
||||
super::export_csv(&m, "Default", &path).unwrap();
|
||||
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_path(&path)
|
||||
.unwrap();
|
||||
let records: Vec<csv::StringRecord> =
|
||||
rdr.records().collect::<Result<_, _>>().unwrap();
|
||||
// Header + one data row, each with exactly 2 fields (label + value)
|
||||
assert_eq!(records.len(), 2, "expected header + 1 row: {records:?}");
|
||||
for r in &records {
|
||||
assert_eq!(r.len(), 2, "comma split a field: {r:?}");
|
||||
}
|
||||
assert_eq!(&records[1][0], "Smith, Jr.");
|
||||
assert!(
|
||||
records[1][1].starts_with("1,200"),
|
||||
"expected comma-grouped value, got: {:?}",
|
||||
&records[1][1]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn export_csv_unknown_view_returns_error() {
|
||||
let m = two_cat_model();
|
||||
@@ -1975,6 +2022,55 @@ mod parser_edge_cases {
|
||||
assert_eq!(m.model.name, "MyModel");
|
||||
}
|
||||
|
||||
/// Bug improvise-4yc: the `Rule::number` arm used
|
||||
/// `as_str().parse().unwrap_or(0.0)`, so any numeric token the grammar
|
||||
/// admitted but `f64::from_str` rejected would silently become
|
||||
/// Number(0.0). The conversion must surface a CellValue::Error naming the
|
||||
/// offending text instead of fabricating a zero.
|
||||
#[test]
|
||||
fn unparseable_number_token_yields_error_not_zero() {
|
||||
// No token the current grammar admits actually fails f64 parsing
|
||||
// (overflow saturates to inf), so exercise the helper directly with
|
||||
// the forms the grammar could plausibly drift toward admitting.
|
||||
for bad in ["3.14.15", "1.5e", "--5", ""] {
|
||||
let got = super::parse_number_cell(bad);
|
||||
match got {
|
||||
CellValue::Error(msg) => {
|
||||
assert!(msg.contains(bad) || bad.is_empty(), "error should name {bad:?}: {msg}")
|
||||
}
|
||||
other => panic!("{bad:?} must not produce {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Bug improvise-4yc (grammar routing): malformed numeric forms in a data
|
||||
/// line must never silently load as Number(0.0). The grammar routes them
|
||||
/// either to bare_value (Text) or to a parse error that names the text.
|
||||
#[test]
|
||||
fn malformed_numbers_in_data_never_silently_zero() {
|
||||
// "--5" fails the number rule entirely → bare_value → Text
|
||||
let text = "v2025-04-09\n# T\n## Data\nType=Food = --5\n";
|
||||
let m = parse_md(text).unwrap();
|
||||
assert_eq!(
|
||||
m.model.get_cell(&coord(&[("Type", "Food")])),
|
||||
Some(&CellValue::Text("--5".into()))
|
||||
);
|
||||
|
||||
// "3.14.15" — number matches the "3.14" prefix and commits, the rest
|
||||
// fails → whole-file parse error whose message shows the line
|
||||
let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n";
|
||||
let err = parse_md(text).unwrap_err().to_string();
|
||||
assert!(err.contains("3.14.15"), "error must name the text: {err}");
|
||||
|
||||
// "1e999" is admitted; f64 parsing saturates to +inf, not 0.0
|
||||
let text = "v2025-04-09\n# T\n## Data\nType=Food = 1e999\n";
|
||||
let m = parse_md(text).unwrap();
|
||||
assert_eq!(
|
||||
m.model.get_cell(&coord(&[("Type", "Food")])),
|
||||
Some(&CellValue::Number(f64::INFINITY))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_data_without_value() {
|
||||
// Malformed data line: no " = " separator — pest rejects it
|
||||
|
||||
Reference in New Issue
Block a user