fix(io): improve persistence robustness and CSV compliance
Improved numeric parsing in Markdown files to surface errors instead of defaulting to 0.0. `export_csv` now uses `csv::Writer` to ensure RFC 4180 compliance (e.g., quoting fields containing commas). `export_csv` now correctly respects the view's number format. Add regression tests for numeric error handling and RFC 4180 compliance. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
This commit is contained in:
@@ -106,6 +106,17 @@ fn format_number(n: f64) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert a token matched by the grammar's `number` rule into a cell value.
|
||||||
|
///
|
||||||
|
/// The grammar only admits tokens that `f64::from_str` accepts (overflow
|
||||||
|
/// saturates to ±inf), but if the two grammars ever drift the mismatch must
|
||||||
|
/// surface — never silently store a wrong number.
|
||||||
|
fn parse_number_cell(s: &str) -> CellValue {
|
||||||
|
s.parse()
|
||||||
|
.map(CellValue::Number)
|
||||||
|
.unwrap_or_else(|_| CellValue::Error(format!("invalid number: {s}")))
|
||||||
|
}
|
||||||
|
|
||||||
// ── File I/O ─────────────────────────────────────────────────────────────────
|
// ── File I/O ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
fn is_gzip(path: &Path) -> bool {
|
fn is_gzip(path: &Path) -> bool {
|
||||||
@@ -430,9 +441,7 @@ pub fn parse_md(text: &str) -> Result<Workbook> {
|
|||||||
.collect::<Result<_>>()?;
|
.collect::<Result<_>>()?;
|
||||||
|
|
||||||
let value = match value_pair.as_rule() {
|
let value = match value_pair.as_rule() {
|
||||||
Rule::number => {
|
Rule::number => parse_number_cell(value_pair.as_str()),
|
||||||
CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
|
|
||||||
}
|
|
||||||
Rule::pipe_quoted => {
|
Rule::pipe_quoted => {
|
||||||
let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
|
let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
|
||||||
CellValue::Text(unescape_pipe(inner.as_str()))
|
CellValue::Text(unescape_pipe(inner.as_str()))
|
||||||
@@ -586,8 +595,9 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
|
|||||||
|
|
||||||
let layout = GridLayout::new(&workbook.model, view);
|
let layout = GridLayout::new(&workbook.model, view);
|
||||||
let model = &workbook.model;
|
let model = &workbook.model;
|
||||||
|
let (fmt_comma, fmt_decimals) = crate::format::parse_number_format(&view.number_format);
|
||||||
|
|
||||||
let mut out = String::new();
|
let mut wtr = csv::WriterBuilder::new().flexible(true).from_path(path)?;
|
||||||
|
|
||||||
// Header row
|
// Header row
|
||||||
let row_header = layout.row_cats.join("/");
|
let row_header = layout.row_cats.join("/");
|
||||||
@@ -601,31 +611,28 @@ pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<(
|
|||||||
} else {
|
} else {
|
||||||
format!("{} ({})", row_header, page_label.join(", "))
|
format!("{} ({})", row_header, page_label.join(", "))
|
||||||
};
|
};
|
||||||
|
let mut header: Vec<String> = Vec::new();
|
||||||
if !header_prefix.is_empty() {
|
if !header_prefix.is_empty() {
|
||||||
out.push_str(&header_prefix);
|
header.push(header_prefix);
|
||||||
out.push(',');
|
|
||||||
}
|
}
|
||||||
let col_labels: Vec<String> = (0..layout.col_count())
|
header.extend((0..layout.col_count()).map(|ci| layout.col_label(ci)));
|
||||||
.map(|ci| layout.col_label(ci))
|
wtr.write_record(&header)?;
|
||||||
.collect();
|
|
||||||
out.push_str(&col_labels.join(","));
|
|
||||||
out.push('\n');
|
|
||||||
|
|
||||||
// Data rows
|
// Data rows
|
||||||
for ri in 0..layout.row_count() {
|
for ri in 0..layout.row_count() {
|
||||||
|
let mut record: Vec<String> = Vec::new();
|
||||||
let row_label = layout.row_label(ri);
|
let row_label = layout.row_label(ri);
|
||||||
if !row_label.is_empty() {
|
if !row_label.is_empty() {
|
||||||
out.push_str(&row_label);
|
record.push(row_label);
|
||||||
out.push(',');
|
|
||||||
}
|
}
|
||||||
let row_values: Vec<String> = (0..layout.col_count())
|
record.extend(
|
||||||
.map(|ci| layout.display_text(model, ri, ci, false, 0))
|
(0..layout.col_count())
|
||||||
.collect();
|
.map(|ci| layout.display_text(model, ri, ci, fmt_comma, fmt_decimals)),
|
||||||
out.push_str(&row_values.join(","));
|
);
|
||||||
out.push('\n');
|
wtr.write_record(&record)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::fs::write(path, out)?;
|
wtr.flush()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1195,6 +1202,46 @@ Type=Food = 42
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bug improvise-1cz: export_csv joined labels/values with bare commas
|
||||||
|
/// (no RFC 4180 quoting) and ignored the view's number format. With a
|
||||||
|
/// comma-grouping format, a value like 1200 renders as "1,200" and a
|
||||||
|
/// label like "Smith, Jr." splits into two fields, corrupting every row.
|
||||||
|
#[test]
|
||||||
|
fn export_csv_quotes_commas_per_rfc4180() {
|
||||||
|
let mut m = Workbook::new("T");
|
||||||
|
m.add_category("Type").unwrap();
|
||||||
|
m.model.category_mut("Type").unwrap().add_item("Smith, Jr.");
|
||||||
|
m.add_category("Month").unwrap();
|
||||||
|
m.model.category_mut("Month").unwrap().add_item("Jan");
|
||||||
|
m.model.set_cell(
|
||||||
|
coord(&[("Type", "Smith, Jr."), ("Month", "Jan")]),
|
||||||
|
CellValue::Number(1200.0),
|
||||||
|
);
|
||||||
|
m.active_view_mut().number_format = ",.2f".to_string();
|
||||||
|
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let path = dir.path().join("export.csv");
|
||||||
|
super::export_csv(&m, "Default", &path).unwrap();
|
||||||
|
|
||||||
|
let mut rdr = csv::ReaderBuilder::new()
|
||||||
|
.has_headers(false)
|
||||||
|
.from_path(&path)
|
||||||
|
.unwrap();
|
||||||
|
let records: Vec<csv::StringRecord> =
|
||||||
|
rdr.records().collect::<Result<_, _>>().unwrap();
|
||||||
|
// Header + one data row, each with exactly 2 fields (label + value)
|
||||||
|
assert_eq!(records.len(), 2, "expected header + 1 row: {records:?}");
|
||||||
|
for r in &records {
|
||||||
|
assert_eq!(r.len(), 2, "comma split a field: {r:?}");
|
||||||
|
}
|
||||||
|
assert_eq!(&records[1][0], "Smith, Jr.");
|
||||||
|
assert!(
|
||||||
|
records[1][1].starts_with("1,200"),
|
||||||
|
"expected comma-grouped value, got: {:?}",
|
||||||
|
&records[1][1]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn export_csv_unknown_view_returns_error() {
|
fn export_csv_unknown_view_returns_error() {
|
||||||
let m = two_cat_model();
|
let m = two_cat_model();
|
||||||
@@ -1975,6 +2022,55 @@ mod parser_edge_cases {
|
|||||||
assert_eq!(m.model.name, "MyModel");
|
assert_eq!(m.model.name, "MyModel");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bug improvise-4yc: the `Rule::number` arm used
|
||||||
|
/// `as_str().parse().unwrap_or(0.0)`, so any numeric token the grammar
|
||||||
|
/// admitted but `f64::from_str` rejected would silently become
|
||||||
|
/// Number(0.0). The conversion must surface a CellValue::Error naming the
|
||||||
|
/// offending text instead of fabricating a zero.
|
||||||
|
#[test]
|
||||||
|
fn unparseable_number_token_yields_error_not_zero() {
|
||||||
|
// No token the current grammar admits actually fails f64 parsing
|
||||||
|
// (overflow saturates to inf), so exercise the helper directly with
|
||||||
|
// the forms the grammar could plausibly drift toward admitting.
|
||||||
|
for bad in ["3.14.15", "1.5e", "--5", ""] {
|
||||||
|
let got = super::parse_number_cell(bad);
|
||||||
|
match got {
|
||||||
|
CellValue::Error(msg) => {
|
||||||
|
assert!(msg.contains(bad) || bad.is_empty(), "error should name {bad:?}: {msg}")
|
||||||
|
}
|
||||||
|
other => panic!("{bad:?} must not produce {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bug improvise-4yc (grammar routing): malformed numeric forms in a data
|
||||||
|
/// line must never silently load as Number(0.0). The grammar routes them
|
||||||
|
/// either to bare_value (Text) or to a parse error that names the text.
|
||||||
|
#[test]
|
||||||
|
fn malformed_numbers_in_data_never_silently_zero() {
|
||||||
|
// "--5" fails the number rule entirely → bare_value → Text
|
||||||
|
let text = "v2025-04-09\n# T\n## Data\nType=Food = --5\n";
|
||||||
|
let m = parse_md(text).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
m.model.get_cell(&coord(&[("Type", "Food")])),
|
||||||
|
Some(&CellValue::Text("--5".into()))
|
||||||
|
);
|
||||||
|
|
||||||
|
// "3.14.15" — number matches the "3.14" prefix and commits, the rest
|
||||||
|
// fails → whole-file parse error whose message shows the line
|
||||||
|
let text = "v2025-04-09\n# T\n## Data\nType=Food = 3.14.15\n";
|
||||||
|
let err = parse_md(text).unwrap_err().to_string();
|
||||||
|
assert!(err.contains("3.14.15"), "error must name the text: {err}");
|
||||||
|
|
||||||
|
// "1e999" is admitted; f64 parsing saturates to +inf, not 0.0
|
||||||
|
let text = "v2025-04-09\n# T\n## Data\nType=Food = 1e999\n";
|
||||||
|
let m = parse_md(text).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
m.model.get_cell(&coord(&[("Type", "Food")])),
|
||||||
|
Some(&CellValue::Number(f64::INFINITY))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_data_without_value() {
|
fn parse_data_without_value() {
|
||||||
// Malformed data line: no " = " separator — pest rejects it
|
// Malformed data line: no " = " separator — pest rejects it
|
||||||
|
|||||||
Reference in New Issue
Block a user