Files
improvise/src/model/cell.rs
Edward Langley 737d14a5c0 fix: add depth limit to formula evaluation, propagate errors
Circular or self-referencing formulas now return CellValue::Error
instead of stack overflowing. eval_expr uses Result<f64, String>
internally so errors (circular refs, div/0, missing refs) propagate
immediately through the expression tree via ?. The depth limit (16)
is checked per evaluate_depth call — normal 1-2 level chains are
unaffected.

Also adds CellValue::Error variant for displaying ERR:reason in the
grid, and handles it in format, persistence, and search.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 00:07:58 -07:00

629 lines
21 KiB
Rust

use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use super::symbol::{Symbol, SymbolTable};
/// A cell key is a sorted vector of (category_name, item_name) pairs.
/// Sorted by category name for canonical form.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CellKey(pub Vec<(String, String)>);
impl CellKey {
pub fn new(mut coords: Vec<(String, String)>) -> Self {
coords.sort_by(|a, b| a.0.cmp(&b.0));
Self(coords)
}
pub fn get(&self, category: &str) -> Option<&str> {
self.0
.iter()
.find(|(c, _)| c == category)
.map(|(_, v)| v.as_str())
}
pub fn with(mut self, category: impl Into<String>, item: impl Into<String>) -> Self {
let cat = category.into();
let itm = item.into();
if let Some(pos) = self.0.iter().position(|(c, _)| c == &cat) {
self.0[pos].1 = itm;
} else {
self.0.push((cat, itm));
self.0.sort_by(|a, b| a.0.cmp(&b.0));
}
self
}
pub fn without(&self, category: &str) -> Self {
Self(
self.0
.iter()
.filter(|(c, _)| c != category)
.cloned()
.collect(),
)
}
#[allow(dead_code)]
pub fn matches_partial(&self, partial: &[(String, String)]) -> bool {
partial
.iter()
.all(|(cat, item)| self.get(cat) == Some(item.as_str()))
}
}
impl std::fmt::Display for CellKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let parts: Vec<_> = self.0.iter().map(|(c, v)| format!("{c}={v}")).collect();
write!(f, "{{{}}}", parts.join(", "))
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum CellValue {
Number(f64),
Text(String),
/// Evaluation error (circular reference, depth overflow, etc.)
Error(String),
}
impl CellValue {
pub fn as_f64(&self) -> Option<f64> {
match self {
CellValue::Number(n) => Some(*n),
_ => None,
}
}
pub fn is_error(&self) -> bool {
matches!(self, CellValue::Error(_))
}
}
impl std::fmt::Display for CellValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CellValue::Number(n) => {
if n.fract() == 0.0 && n.abs() < 1e15 {
write!(f, "{}", *n as i64)
} else {
write!(f, "{n:.4}")
}
}
CellValue::Text(s) => write!(f, "{s}"),
CellValue::Error(msg) => write!(f, "ERR:{msg}"),
}
}
}
/// Interned representation of a CellKey — cheap to hash and compare.
/// Sorted by first element (category Symbol) for canonical form.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InternedKey(pub Vec<(Symbol, Symbol)>);
/// Serialized as a list of (key, value) pairs so CellKey doesn't need
/// to implement the `Serialize`-as-string requirement for JSON object keys.
#[derive(Debug, Clone, Default)]
pub struct DataStore {
/// Primary storage — interned keys for O(1) hash/compare.
cells: HashMap<InternedKey, CellValue>,
/// String interner — all category/item names are interned here.
pub symbols: SymbolTable,
/// Secondary index: interned (category, item) → set of interned keys.
index: HashMap<(Symbol, Symbol), HashSet<InternedKey>>,
}
impl Serialize for DataStore {
fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeSeq;
let mut seq = s.serialize_seq(Some(self.cells.len()))?;
for (k, v) in &self.cells {
let cell_key = self.to_cell_key(k);
seq.serialize_element(&(cell_key, v))?;
}
seq.end()
}
}
impl<'de> Deserialize<'de> for DataStore {
fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
let pairs: Vec<(CellKey, CellValue)> = Vec::deserialize(d)?;
let mut store = DataStore::default();
for (key, value) in pairs {
store.set(key, value);
}
Ok(store)
}
}
impl DataStore {
pub fn new() -> Self {
Self::default()
}
/// Intern a CellKey into an InternedKey.
pub fn intern_key(&mut self, key: &CellKey) -> InternedKey {
InternedKey(self.symbols.intern_coords(&key.0))
}
/// Convert an InternedKey back to a CellKey (string form).
pub fn to_cell_key(&self, ikey: &InternedKey) -> CellKey {
CellKey(
ikey.0
.iter()
.map(|(c, i)| {
(
self.symbols.resolve(*c).to_string(),
self.symbols.resolve(*i).to_string(),
)
})
.collect(),
)
}
pub fn set(&mut self, key: CellKey, value: CellValue) {
let ikey = self.intern_key(&key);
// Update index for each coordinate pair
for pair in &ikey.0 {
self.index.entry(*pair).or_default().insert(ikey.clone());
}
self.cells.insert(ikey, value);
}
pub fn get(&self, key: &CellKey) -> Option<&CellValue> {
let ikey = self.lookup_key(key)?;
self.cells.get(&ikey)
}
/// Look up an InternedKey for a CellKey without interning new symbols.
fn lookup_key(&self, key: &CellKey) -> Option<InternedKey> {
let pairs: Option<Vec<(Symbol, Symbol)>> = key
.0
.iter()
.map(|(c, i)| Some((self.symbols.get(c)?, self.symbols.get(i)?)))
.collect();
pairs.map(InternedKey)
}
/// Iterate over all cells, yielding (CellKey, &CellValue) pairs.
pub fn iter_cells(&self) -> impl Iterator<Item = (CellKey, &CellValue)> {
self.cells.iter().map(|(k, v)| (self.to_cell_key(k), v))
}
pub fn remove(&mut self, key: &CellKey) {
let Some(ikey) = self.lookup_key(key) else {
return;
};
if self.cells.remove(&ikey).is_some() {
for pair in &ikey.0 {
if let Some(set) = self.index.get_mut(pair) {
set.remove(&ikey);
}
}
}
}
/// Values of all cells where every coordinate in `partial` matches.
/// Hot path: avoids allocating CellKey for each result.
pub fn matching_values(&self, partial: &[(String, String)]) -> Vec<&CellValue> {
if partial.is_empty() {
return self.cells.values().collect();
}
// Intern the partial key (lookup only, no new symbols)
let interned_partial: Vec<(Symbol, Symbol)> = partial
.iter()
.filter_map(|(c, i)| Some((self.symbols.get(c)?, self.symbols.get(i)?)))
.collect();
if interned_partial.len() < partial.len() {
return vec![];
}
let mut sets: Vec<&HashSet<InternedKey>> = interned_partial
.iter()
.filter_map(|pair| self.index.get(pair))
.collect();
if sets.len() < interned_partial.len() {
return vec![];
}
sets.sort_by_key(|s| s.len());
let first = sets[0];
let rest = &sets[1..];
first
.iter()
.filter(|ikey| rest.iter().all(|s| s.contains(*ikey)))
.filter_map(|ikey| self.cells.get(ikey))
.collect()
}
/// All cells where every coordinate in `partial` matches.
/// Allocates CellKey strings for each match — use `matching_values`
/// if you only need values.
#[allow(dead_code)]
pub fn matching_cells(&self, partial: &[(String, String)]) -> Vec<(CellKey, &CellValue)> {
if partial.is_empty() {
return self.iter_cells().collect();
}
let interned_partial: Vec<(Symbol, Symbol)> = partial
.iter()
.filter_map(|(c, i)| Some((self.symbols.get(c)?, self.symbols.get(i)?)))
.collect();
if interned_partial.len() < partial.len() {
return vec![];
}
let mut sets: Vec<&HashSet<InternedKey>> = interned_partial
.iter()
.filter_map(|pair| self.index.get(pair))
.collect();
if sets.len() < interned_partial.len() {
return vec![];
}
sets.sort_by_key(|s| s.len());
let first = sets[0];
let rest = &sets[1..];
first
.iter()
.filter(|ikey| rest.iter().all(|s| s.contains(*ikey)))
.filter_map(|ikey| {
let value = self.cells.get(ikey)?;
Some((self.to_cell_key(ikey), value))
})
.collect()
}
}
#[cfg(test)]
mod cell_key {
use super::CellKey;
fn key(pairs: &[(&str, &str)]) -> CellKey {
CellKey::new(
pairs
.iter()
.map(|(c, i)| (c.to_string(), i.to_string()))
.collect(),
)
}
#[test]
fn coords_are_sorted_by_category_name() {
let k = key(&[
("Region", "East"),
("Measure", "Revenue"),
("Product", "Shirts"),
]);
assert_eq!(k.0[0].0, "Measure");
assert_eq!(k.0[1].0, "Product");
assert_eq!(k.0[2].0, "Region");
}
#[test]
fn get_returns_item_for_known_category() {
let k = key(&[("Region", "East"), ("Product", "Shirts")]);
assert_eq!(k.get("Region"), Some("East"));
assert_eq!(k.get("Product"), Some("Shirts"));
}
#[test]
fn get_returns_none_for_unknown_category() {
let k = key(&[("Region", "East")]);
assert_eq!(k.get("Measure"), None);
}
#[test]
fn with_adds_new_coordinate_in_sorted_order() {
let k = key(&[("Region", "East")]).with("Measure", "Revenue");
assert_eq!(k.get("Measure"), Some("Revenue"));
assert_eq!(k.get("Region"), Some("East"));
assert_eq!(k.0[0].0, "Measure");
assert_eq!(k.0[1].0, "Region");
}
#[test]
fn with_replaces_existing_coordinate() {
let k = key(&[("Region", "East"), ("Product", "Shirts")]).with("Region", "West");
assert_eq!(k.get("Region"), Some("West"));
assert_eq!(k.0.len(), 2);
}
#[test]
fn without_removes_coordinate() {
let k = key(&[("Region", "East"), ("Product", "Shirts")]).without("Region");
assert_eq!(k.get("Region"), None);
assert_eq!(k.get("Product"), Some("Shirts"));
assert_eq!(k.0.len(), 1);
}
#[test]
fn without_missing_category_is_noop() {
let k = key(&[("Region", "East")]).without("Measure");
assert_eq!(k.0.len(), 1);
}
#[test]
fn matches_partial_full_match() {
let k = key(&[("Region", "East"), ("Product", "Shirts")]);
let partial = vec![("Region".to_string(), "East".to_string())];
assert!(k.matches_partial(&partial));
}
#[test]
fn matches_partial_empty_matches_all() {
let k = key(&[("Region", "East"), ("Product", "Shirts")]);
assert!(k.matches_partial(&[]));
}
#[test]
fn matches_partial_wrong_item_no_match() {
let k = key(&[("Region", "East"), ("Product", "Shirts")]);
let partial = vec![("Region".to_string(), "West".to_string())];
assert!(!k.matches_partial(&partial));
}
#[test]
fn matches_partial_missing_category_no_match() {
let k = key(&[("Region", "East")]);
let partial = vec![("Product".to_string(), "Shirts".to_string())];
assert!(!k.matches_partial(&partial));
}
#[test]
fn display_format() {
let k = key(&[("Region", "East")]);
assert_eq!(k.to_string(), "{Region=East}");
}
}
#[cfg(test)]
mod data_store {
use super::{CellKey, CellValue, DataStore};
fn key(pairs: &[(&str, &str)]) -> CellKey {
CellKey::new(
pairs
.iter()
.map(|(c, i)| (c.to_string(), i.to_string()))
.collect(),
)
}
#[test]
fn get_missing_returns_empty() {
let store = DataStore::new();
assert_eq!(store.get(&key(&[("Region", "East")])), None);
}
#[test]
fn set_and_get_roundtrip() {
let mut store = DataStore::new();
let k = key(&[("Region", "East"), ("Product", "Shirts")]);
store.set(k.clone(), CellValue::Number(42.0));
assert_eq!(store.get(&k), Some(&CellValue::Number(42.0)));
}
#[test]
fn overwrite_value() {
let mut store = DataStore::new();
let k = key(&[("Region", "East")]);
store.set(k.clone(), CellValue::Number(1.0));
store.set(k.clone(), CellValue::Number(99.0));
assert_eq!(store.get(&k), Some(&CellValue::Number(99.0)));
}
#[test]
fn remove_evicts_key() {
let mut store = DataStore::new();
let k = key(&[("Region", "East")]);
store.set(k.clone(), CellValue::Number(5.0));
store.remove(&k);
assert!(store.iter_cells().next().is_none());
}
#[test]
fn matching_cells_returns_correct_subset() {
let mut store = DataStore::new();
store.set(
key(&[("Measure", "Revenue"), ("Region", "East")]),
CellValue::Number(100.0),
);
store.set(
key(&[("Measure", "Revenue"), ("Region", "West")]),
CellValue::Number(200.0),
);
store.set(
key(&[("Measure", "Cost"), ("Region", "East")]),
CellValue::Number(50.0),
);
let partial = vec![("Measure".to_string(), "Revenue".to_string())];
let cells = store.matching_cells(&partial);
assert_eq!(cells.len(), 2);
let values: Vec<f64> = cells.iter().filter_map(|(_, v)| v.as_f64()).collect();
assert!(values.contains(&100.0));
assert!(values.contains(&200.0));
}
}
#[cfg(test)]
mod prop_tests {
use super::{CellKey, CellValue, DataStore};
use proptest::prelude::*;
/// Strategy: map of unique cat→item strings (HashMap guarantees unique keys).
fn pairs_map() -> impl Strategy<Value = Vec<(String, String)>> {
prop::collection::hash_map("[a-f]{1,5}", "[a-z]{1,5}", 1..6)
.prop_map(|m| m.into_iter().collect())
}
/// Strategy: finite f64 (no NaN, no infinity).
fn finite_f64() -> impl Strategy<Value = f64> {
prop::num::f64::NORMAL.prop_filter("finite", |f| f.is_finite())
}
proptest! {
// ── CellKey invariants ────────────────────────────────────────────────
/// Pairs are always in ascending category-name order after construction.
#[test]
fn cellkey_always_sorted(pairs in pairs_map()) {
let key = CellKey::new(pairs);
for w in key.0.windows(2) {
prop_assert!(w[0].0 <= w[1].0,
"out of order: {:?} then {:?}", w[0].0, w[1].0);
}
}
/// Reversing the input produces an identical key (order-independence).
#[test]
fn cellkey_order_independent(pairs in pairs_map()) {
let mut rev = pairs.clone();
rev.reverse();
prop_assert_eq!(CellKey::new(pairs), CellKey::new(rev));
}
/// get(cat) finds every pair that was passed to new().
#[test]
fn cellkey_get_retrieves_all_pairs(pairs in pairs_map()) {
let key = CellKey::new(pairs.clone());
for (cat, item) in &pairs {
prop_assert_eq!(key.get(cat), Some(item.as_str()),
"missing {}={}", cat, item);
}
}
/// with(cat, val) — if cat already exists, it is updated in-place.
#[test]
fn cellkey_with_overwrites_existing(
pairs in pairs_map(),
new_item in "[a-z]{1,5}",
) {
let key = CellKey::new(pairs.clone());
let cat = pairs[0].0.clone();
let key2 = key.with(cat.clone(), new_item.clone());
prop_assert_eq!(key2.get(&cat), Some(new_item.as_str()));
// length unchanged when cat already exists
prop_assert_eq!(key2.0.len(), pairs.len());
}
/// with(fresh_cat, val) — a brand-new category is inserted and the
/// result is still sorted.
#[test]
fn cellkey_with_adds_new_category(
pairs in pairs_map(),
// use g-z so it is unlikely to collide with a-f used in pairs_map
fresh_cat in "[g-z]{1,5}",
new_item in "[a-z]{1,5}",
) {
let key = CellKey::new(pairs.clone());
// only run if fresh_cat is truly absent
prop_assume!(!pairs.iter().any(|(c, _)| c == &fresh_cat));
let key2 = key.with(fresh_cat.clone(), new_item.clone());
prop_assert_eq!(key2.get(&fresh_cat), Some(new_item.as_str()));
prop_assert_eq!(key2.0.len(), pairs.len() + 1);
for w in key2.0.windows(2) {
prop_assert!(w[0].0 <= w[1].0, "not sorted after with()");
}
}
/// without(cat) — the removed category is absent; all others survive.
#[test]
fn cellkey_without_removes_and_preserves(pairs in pairs_map()) {
prop_assume!(pairs.len() >= 2);
let removed_cat = pairs[0].0.clone();
let key = CellKey::new(pairs.clone());
let key2 = key.without(&removed_cat);
prop_assert_eq!(key2.get(&removed_cat), None);
for (cat, item) in pairs.iter().skip(1) {
prop_assert_eq!(key2.get(cat), Some(item.as_str()));
}
}
// ── DataStore invariants ──────────────────────────────────────────────
/// Setting a value and immediately getting it back returns the same value.
#[test]
fn datastore_set_get_roundtrip(pairs in pairs_map(), val in finite_f64()) {
let key = CellKey::new(pairs);
let mut store = DataStore::default();
store.set(key.clone(), CellValue::Number(val));
prop_assert_eq!(store.get(&key), Some(&CellValue::Number(val)));
}
/// Removing after a real value: get returns None (key is evicted).
#[test]
fn datastore_empty_evicts_key(pairs in pairs_map(), val in finite_f64()) {
let key = CellKey::new(pairs);
let mut store = DataStore::default();
store.set(key.clone(), CellValue::Number(val));
store.remove(&key);
prop_assert_eq!(store.get(&key), None);
}
/// The last write to a key wins.
#[test]
fn datastore_last_write_wins(
pairs in pairs_map(),
v1 in finite_f64(),
v2 in finite_f64(),
) {
let key = CellKey::new(pairs);
let mut store = DataStore::default();
store.set(key.clone(), CellValue::Number(v1));
store.set(key.clone(), CellValue::Number(v2));
prop_assert_eq!(store.get(&key), Some(&CellValue::Number(v2)));
}
/// Two keys that differ by one coordinate are fully independent.
#[test]
fn datastore_distinct_keys_independent(
pairs in pairs_map(),
v1 in finite_f64(),
v2 in finite_f64(),
new_item in "[g-z]{1,5}",
) {
// key2 shares all categories with key1 but has a different item in
// the first category, so key1 ≠ key2.
let mut pairs2 = pairs.clone();
let changed_cat = pairs2[0].0.clone();
pairs2[0].1 = new_item.clone();
prop_assume!(pairs[0].1 != new_item); // ensure they truly differ
let key1 = CellKey::new(pairs);
let key2 = CellKey::new(pairs2);
let mut store = DataStore::default();
store.set(key1.clone(), CellValue::Number(v1));
store.set(key2.clone(), CellValue::Number(v2));
prop_assert_eq!(store.get(&key1), Some(&CellValue::Number(v1)),
"key1 corrupted after writing key2 (diff in {})", changed_cat);
prop_assert_eq!(store.get(&key2), Some(&CellValue::Number(v2)));
}
/// Every cell returned by matching_cells actually satisfies the partial key.
#[test]
fn datastore_matching_cells_all_match_partial(
pairs in pairs_map(),
val in finite_f64(),
) {
prop_assume!(pairs.len() >= 2);
let key = CellKey::new(pairs.clone());
let mut store = DataStore::default();
store.set(key, CellValue::Number(val));
// partial = first pair only
let partial = vec![pairs[0].clone()];
let results = store.matching_cells(&partial);
for (result_key, _) in &results {
prop_assert!(result_key.matches_partial(&partial),
"returned key {result_key} does not match partial {partial:?}");
}
}
}
}