refactor(core): use epsilon for float comparison and IndexSet for stem collection
Use `FLOAT_EQ_EPSILON` for equality/inequality operators and division-by-zero guards in formula evaluation to ensure consistent semantics. Replace `Vec` with `IndexSet` for stem collection in `recompute_formulas` to improve performance from O(n²) to O(n). Add regression tests for epsilon-based comparison and stem collection performance. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
use indexmap::IndexMap;
|
use indexmap::{IndexMap, IndexSet};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::category::{Category, CategoryId};
|
use super::category::{Category, CategoryId};
|
||||||
@@ -10,6 +10,15 @@ use crate::formula::{AggFunc, Formula};
|
|||||||
|
|
||||||
const MAX_CATEGORIES: usize = 12;
|
const MAX_CATEGORIES: usize = 12;
|
||||||
|
|
||||||
|
/// Tolerance for float comparison in formula evaluation. The `=`/`!=`
|
||||||
|
/// operators and the division-by-zero guard share this epsilon so the
|
||||||
|
/// semantics agree: any value that `x = 0` treats as zero is also rejected
|
||||||
|
/// as a divisor (`div/0`). Otherwise `IF(x = 0, a, y / x)` could take the
|
||||||
|
/// else branch's division with an x its own condition called zero.
|
||||||
|
/// 1e-10 is far below typical data magnitudes but absorbs accumulated
|
||||||
|
/// f64 rounding noise from chained formulas.
|
||||||
|
const FLOAT_EQ_EPSILON: f64 = 1e-10;
|
||||||
|
|
||||||
/// Pure-data document model: categories, cells, and formulas.
|
/// Pure-data document model: categories, cells, and formulas.
|
||||||
///
|
///
|
||||||
/// `Model` intentionally does **not** know about views. The view axes and
|
/// `Model` intentionally does **not** know about views. The view axes and
|
||||||
@@ -342,8 +351,9 @@ impl Model {
|
|||||||
.map(|f| (f.target_category.clone(), f.target.clone()))
|
.map(|f| (f.target_category.clone(), f.target.clone()))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// Gather all unique partial keys (stems) for each formula category
|
// Gather all unique partial keys (stems) for each formula category.
|
||||||
let mut stems: Vec<CellKey> = Vec::new();
|
// IndexSet dedupes in O(1) per cell while preserving insertion order.
|
||||||
|
let mut stems: IndexSet<CellKey> = IndexSet::new();
|
||||||
for (target_cat, _) in &formula_cats {
|
for (target_cat, _) in &formula_cats {
|
||||||
for (key, _) in self.data.iter_cells() {
|
for (key, _) in self.data.iter_cells() {
|
||||||
let stem = key.without(target_cat);
|
let stem = key.without(target_cat);
|
||||||
@@ -353,9 +363,7 @@ impl Model {
|
|||||||
for nc in none_cats {
|
for nc in none_cats {
|
||||||
stripped = stripped.without(nc);
|
stripped = stripped.without(nc);
|
||||||
}
|
}
|
||||||
if !stems.contains(&stripped) {
|
stems.insert(stripped);
|
||||||
stems.push(stripped);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -470,7 +478,7 @@ impl Model {
|
|||||||
BinOp::Sub => Ok(lv - rv),
|
BinOp::Sub => Ok(lv - rv),
|
||||||
BinOp::Mul => Ok(lv * rv),
|
BinOp::Mul => Ok(lv * rv),
|
||||||
BinOp::Div => {
|
BinOp::Div => {
|
||||||
if rv == 0.0 {
|
if rv.abs() < FLOAT_EQ_EPSILON {
|
||||||
Err("div/0".into())
|
Err("div/0".into())
|
||||||
} else {
|
} else {
|
||||||
Ok(lv / rv)
|
Ok(lv / rv)
|
||||||
@@ -552,8 +560,8 @@ impl Model {
|
|||||||
let lv = eval_expr_cached(l, context, model, target_category, none_cats)?;
|
let lv = eval_expr_cached(l, context, model, target_category, none_cats)?;
|
||||||
let rv = eval_expr_cached(r, context, model, target_category, none_cats)?;
|
let rv = eval_expr_cached(r, context, model, target_category, none_cats)?;
|
||||||
match op {
|
match op {
|
||||||
BinOp::Eq => Ok((lv - rv).abs() < 1e-10),
|
BinOp::Eq => Ok((lv - rv).abs() < FLOAT_EQ_EPSILON),
|
||||||
BinOp::Ne => Ok((lv - rv).abs() >= 1e-10),
|
BinOp::Ne => Ok((lv - rv).abs() >= FLOAT_EQ_EPSILON),
|
||||||
BinOp::Lt => Ok(lv < rv),
|
BinOp::Lt => Ok(lv < rv),
|
||||||
BinOp::Gt => Ok(lv > rv),
|
BinOp::Gt => Ok(lv > rv),
|
||||||
BinOp::Le => Ok(lv <= rv),
|
BinOp::Le => Ok(lv <= rv),
|
||||||
@@ -671,7 +679,7 @@ impl Model {
|
|||||||
BinOp::Sub => Ok(lv - rv),
|
BinOp::Sub => Ok(lv - rv),
|
||||||
BinOp::Mul => Ok(lv * rv),
|
BinOp::Mul => Ok(lv * rv),
|
||||||
BinOp::Div => {
|
BinOp::Div => {
|
||||||
if rv == 0.0 {
|
if rv.abs() < FLOAT_EQ_EPSILON {
|
||||||
Err("div/0".into())
|
Err("div/0".into())
|
||||||
} else {
|
} else {
|
||||||
Ok(lv / rv)
|
Ok(lv / rv)
|
||||||
@@ -746,8 +754,8 @@ impl Model {
|
|||||||
let lv = eval_expr(l, context, model, target_category, depth)?;
|
let lv = eval_expr(l, context, model, target_category, depth)?;
|
||||||
let rv = eval_expr(r, context, model, target_category, depth)?;
|
let rv = eval_expr(r, context, model, target_category, depth)?;
|
||||||
match op {
|
match op {
|
||||||
BinOp::Eq => Ok((lv - rv).abs() < 1e-10),
|
BinOp::Eq => Ok((lv - rv).abs() < FLOAT_EQ_EPSILON),
|
||||||
BinOp::Ne => Ok((lv - rv).abs() >= 1e-10),
|
BinOp::Ne => Ok((lv - rv).abs() >= FLOAT_EQ_EPSILON),
|
||||||
BinOp::Lt => Ok(lv < rv),
|
BinOp::Lt => Ok(lv < rv),
|
||||||
BinOp::Gt => Ok(lv > rv),
|
BinOp::Gt => Ok(lv > rv),
|
||||||
BinOp::Le => Ok(lv <= rv),
|
BinOp::Le => Ok(lv <= rv),
|
||||||
@@ -1534,6 +1542,113 @@ mod formula_tests {
|
|||||||
Some(CellValue::Number(2.0))
|
Some(CellValue::Number(2.0))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bug (improvise-0bf): `=`/`!=` compare with a 1e-10 epsilon, but the
|
||||||
|
/// division-by-zero guard checked `rv == 0.0` exactly. With X = 1e-11
|
||||||
|
/// the comparison `X = 0` is true, yet `100 / X` happily divided —
|
||||||
|
/// incoherent semantics for `IF(X = 0, a, y / X)`. Any value the
|
||||||
|
/// equality operator treats as zero must also trigger div/0.
|
||||||
|
/// This exercises the direct eval path (`eval_expr` in
|
||||||
|
/// `eval_formula_depth`).
|
||||||
|
#[test]
|
||||||
|
fn near_zero_divisor_consistent_with_equality_epsilon() {
|
||||||
|
let mut m = Model::new("Test");
|
||||||
|
m.add_category("_Measure").unwrap();
|
||||||
|
if let Some(cat) = m.category_mut("_Measure") {
|
||||||
|
cat.add_item("X");
|
||||||
|
cat.add_item("IsZero");
|
||||||
|
cat.add_item("Ratio");
|
||||||
|
}
|
||||||
|
m.set_cell(coord(&[("_Measure", "X")]), CellValue::Number(1e-11));
|
||||||
|
m.add_formula(parse_formula("IsZero = IF(X = 0, 1, 0)", "_Measure").unwrap());
|
||||||
|
m.add_formula(parse_formula("Ratio = 100 / X", "_Measure").unwrap());
|
||||||
|
// Equality says X is zero…
|
||||||
|
assert_eq!(
|
||||||
|
m.evaluate(&coord(&[("_Measure", "IsZero")])),
|
||||||
|
Some(CellValue::Number(1.0))
|
||||||
|
);
|
||||||
|
// …so dividing by X must be div/0, not 1e13.
|
||||||
|
// Bug: returns Number(1e13) — guard uses exact == 0.0.
|
||||||
|
assert_eq!(
|
||||||
|
m.evaluate(&coord(&[("_Measure", "Ratio")])),
|
||||||
|
Some(CellValue::Error("div/0".into()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Same bug (improvise-0bf), fixed-point/cached eval path
|
||||||
|
/// (`eval_expr_cached` in `eval_formula_with_cache`): its division
|
||||||
|
/// guard also used exact `== 0.0` while `=` used the epsilon.
|
||||||
|
#[test]
|
||||||
|
fn near_zero_divisor_consistent_in_cached_eval_path() {
|
||||||
|
let mut m = Model::new("Test");
|
||||||
|
m.add_category("_Measure").unwrap();
|
||||||
|
m.add_category("Region").unwrap();
|
||||||
|
if let Some(cat) = m.category_mut("_Measure") {
|
||||||
|
cat.add_item("X");
|
||||||
|
cat.add_item("IsZero");
|
||||||
|
cat.add_item("Ratio");
|
||||||
|
}
|
||||||
|
if let Some(cat) = m.category_mut("Region") {
|
||||||
|
cat.add_item("East");
|
||||||
|
}
|
||||||
|
m.set_cell(
|
||||||
|
coord(&[("_Measure", "X"), ("Region", "East")]),
|
||||||
|
CellValue::Number(1e-11),
|
||||||
|
);
|
||||||
|
m.add_formula(parse_formula("IsZero = IF(X = 0, 1, 0)", "_Measure").unwrap());
|
||||||
|
m.add_formula(parse_formula("Ratio = 100 / X", "_Measure").unwrap());
|
||||||
|
let none_cats = vec!["Region".to_string()];
|
||||||
|
m.recompute_formulas(&none_cats);
|
||||||
|
// Equality says X is zero…
|
||||||
|
assert_eq!(
|
||||||
|
m.evaluate_aggregated(&coord(&[("_Measure", "IsZero")]), &none_cats),
|
||||||
|
Some(CellValue::Number(1.0))
|
||||||
|
);
|
||||||
|
// …so dividing by X must be div/0 in the cached path too.
|
||||||
|
assert_eq!(
|
||||||
|
m.evaluate_aggregated(&coord(&[("_Measure", "Ratio")]), &none_cats),
|
||||||
|
Some(CellValue::Error("div/0".into()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Regression guard for improvise-6os: stem collection in
|
||||||
|
/// `recompute_formulas` deduplicated with `Vec::contains` (O(n²) over
|
||||||
|
/// all data cells per formula category). Replaced with an `IndexSet` —
|
||||||
|
/// a pure perf change. This pins the observable contract: recomputed
|
||||||
|
/// values for a multi-stem model (chained formulas, duplicate stems
|
||||||
|
/// from multiple measures per stem) are unchanged and stable across
|
||||||
|
/// repeated recomputes.
|
||||||
|
#[test]
|
||||||
|
fn recompute_formulas_multi_stem_values_stable() {
|
||||||
|
let mut m = revenue_cost_model();
|
||||||
|
m.add_formula(parse_formula("Profit = Revenue - Cost", "_Measure").unwrap());
|
||||||
|
m.add_formula(parse_formula("Margin = Profit / Revenue", "_Measure").unwrap());
|
||||||
|
m.recompute_formulas(&[]);
|
||||||
|
|
||||||
|
let cases = [
|
||||||
|
(("Profit", "East"), 400.0),
|
||||||
|
(("Profit", "West"), 300.0),
|
||||||
|
(("Margin", "East"), 0.4),
|
||||||
|
(("Margin", "West"), 0.375),
|
||||||
|
];
|
||||||
|
for ((target, region), expected) in cases {
|
||||||
|
let key = coord(&[("_Measure", target), ("Region", region)]);
|
||||||
|
let val = m
|
||||||
|
.formula_cache
|
||||||
|
.get(&key)
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.unwrap_or_else(|| panic!("missing cache entry for {target}/{region}"));
|
||||||
|
assert!(
|
||||||
|
approx_eq(val, expected),
|
||||||
|
"{target}/{region}: expected {expected}, got {val}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recomputing again reproduces the exact same cache (determinism).
|
||||||
|
let snapshot = m.formula_cache.clone();
|
||||||
|
m.recompute_formulas(&[]);
|
||||||
|
assert_eq!(m.formula_cache, snapshot);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
Reference in New Issue
Block a user