Files
improvise/src/persistence/mod.rs
Ed L 1345142fe0 fix: make .improv parser order-independent via two-pass approach
Root cause: set_axis silently ignores unregistered categories, so a
view section appearing before its categories would produce wrong axis
assignments when on_category_added later ran and assigned defaults.

Fix: collect all raw data in pass 1, then build the model in the
correct dependency order in pass 2 (categories → views → data/formulas).
The file can now list sections in any order.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 11:37:40 -07:00

681 lines
27 KiB
Rust

use std::io::{Read, Write, BufReader, BufWriter};
use std::path::Path;
use anyhow::{Context, Result};
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;
use crate::model::Model;
use crate::model::cell::{CellKey, CellValue};
use crate::model::category::Group;
use crate::view::{Axis, GridLayout};
use crate::formula::parse_formula;
pub fn save(model: &Model, path: &Path) -> Result<()> {
let text = format_md(model);
if path.to_str().map(|s| s.ends_with(".gz")).unwrap_or(false) {
let file = std::fs::File::create(path)
.with_context(|| format!("Cannot create {}", path.display()))?;
let mut encoder = GzEncoder::new(BufWriter::new(file), Compression::default());
encoder.write_all(text.as_bytes())?;
encoder.finish()?;
} else {
std::fs::write(path, &text)
.with_context(|| format!("Cannot write {}", path.display()))?;
}
Ok(())
}
pub fn load(path: &Path) -> Result<Model> {
let file = std::fs::File::open(path)
.with_context(|| format!("Cannot open {}", path.display()))?;
let text = if path.to_str().map(|s| s.ends_with(".gz")).unwrap_or(false) {
let mut decoder = GzDecoder::new(BufReader::new(file));
let mut s = String::new();
decoder.read_to_string(&mut s)?;
s
} else {
let mut s = String::new();
BufReader::new(file).read_to_string(&mut s)?;
s
};
if text.trim_start().starts_with('{') {
serde_json::from_str(&text).context("Failed to deserialize model")
} else {
parse_md(&text)
}
}
pub fn autosave_path(path: &Path) -> std::path::PathBuf {
let mut p = path.to_path_buf();
let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("model");
p.set_file_name(format!(".{name}.autosave"));
p
}
/// Serialize a model to the markdown `.improv` format.
pub fn format_md(model: &Model) -> String {
use std::fmt::Write;
let mut out = String::new();
writeln!(out, "# {}", model.name).unwrap();
// Categories
for cat in model.categories.values() {
writeln!(out, "\n## Category: {}", cat.name).unwrap();
for item in cat.items.values() {
match &item.group {
Some(g) => writeln!(out, "- {} [{}]", item.name, g).unwrap(),
None => writeln!(out, "- {}", item.name).unwrap(),
}
}
// Group hierarchy: lines starting with `>` for groups that have a parent
for g in &cat.groups {
if let Some(parent) = &g.parent {
writeln!(out, "> {} [{}]", g.name, parent).unwrap();
}
}
}
// Formulas
if !model.formulas().is_empty() {
writeln!(out, "\n## Formulas").unwrap();
for f in model.formulas() {
writeln!(out, "- {} [{}]", f.raw, f.target_category).unwrap();
}
}
// Data — sorted by coordinate string for deterministic diffs
let mut cells: Vec<_> = model.data.cells().iter().collect();
cells.sort_by_key(|(k, _)| coord_str(k));
if !cells.is_empty() {
writeln!(out, "\n## Data").unwrap();
for (key, value) in cells {
let val_str = match value {
CellValue::Number(_) => value.to_string(),
CellValue::Text(s) => format!("\"{}\"", s),
};
writeln!(out, "{} = {}", coord_str(key), val_str).unwrap();
}
}
// Views
for (view_name, view) in &model.views {
let active = if view_name == &model.active_view { " (active)" } else { "" };
writeln!(out, "\n## View: {}{}", view.name, active).unwrap();
for (cat, axis) in &view.category_axes {
match axis {
Axis::Row => writeln!(out, "{}: row", cat).unwrap(),
Axis::Column => writeln!(out, "{}: column", cat).unwrap(),
Axis::Page => {
match view.page_selections.get(cat) {
Some(sel) => writeln!(out, "{}: page, {}", cat, sel).unwrap(),
None => writeln!(out, "{}: page", cat).unwrap(),
}
}
}
}
if !view.number_format.is_empty() {
writeln!(out, "format: {}", view.number_format).unwrap();
}
// Hidden items (sorted for deterministic diffs)
let mut hidden: Vec<(&str, &str)> = view.hidden_items.iter()
.flat_map(|(cat, items)| items.iter().map(move |item| (cat.as_str(), item.as_str())))
.collect();
hidden.sort();
for (cat, item) in hidden {
writeln!(out, "hidden: {}/{}", cat, item).unwrap();
}
// Collapsed groups (sorted for deterministic diffs)
let mut collapsed: Vec<(&str, &str)> = view.collapsed_groups.iter()
.flat_map(|(cat, gs)| gs.iter().map(move |g| (cat.as_str(), g.as_str())))
.collect();
collapsed.sort();
for (cat, group) in collapsed {
writeln!(out, "collapsed: {}/{}", cat, group).unwrap();
}
}
out
}
/// Parse the markdown `.improv` format into a Model.
///
/// Uses a two-pass approach so the file is order-independent:
/// pass 1 collects raw data, pass 2 builds the model with categories
/// registered before views are configured.
pub fn parse_md(text: &str) -> Result<Model> {
// ── Intermediate types ────────────────────────────────────────────────────
struct PCategory {
name: String,
items: Vec<(String, Option<String>)>, // (name, group)
group_parents: Vec<(String, String)>, // (group, parent)
}
struct PView {
name: String,
is_active: bool,
axes: Vec<(String, Axis)>,
page_selections: Vec<(String, String)>,
format: String,
hidden: Vec<(String, String)>,
collapsed: Vec<(String, String)>,
}
// ── Pass 1: collect ───────────────────────────────────────────────────────
#[derive(PartialEq)]
enum Section { None, Category, Formulas, Data, View }
let mut model_name: Option<String> = None;
let mut categories: Vec<PCategory> = Vec::new();
let mut formulas: Vec<(String, String)> = Vec::new(); // (raw, category)
let mut data: Vec<(CellKey, CellValue)> = Vec::new();
let mut views: Vec<PView> = Vec::new();
let mut section = Section::None;
for line in text.lines() {
let trimmed = line.trim();
if trimmed.is_empty() { continue; }
if trimmed.starts_with("# ") && !trimmed.starts_with("## ") {
model_name = Some(trimmed[2..].trim().to_string());
continue;
}
if let Some(rest) = trimmed.strip_prefix("## Category: ") {
categories.push(PCategory { name: rest.trim().to_string(),
items: Vec::new(), group_parents: Vec::new() });
section = Section::Category;
continue;
}
if trimmed == "## Formulas" { section = Section::Formulas; continue; }
if trimmed == "## Data" { section = Section::Data; continue; }
if let Some(rest) = trimmed.strip_prefix("## View: ") {
let (name, is_active) = match rest.trim().strip_suffix(" (active)") {
Some(n) => (n.trim().to_string(), true),
None => (rest.trim().to_string(), false),
};
views.push(PView { name, is_active, axes: Vec::new(),
page_selections: Vec::new(), format: String::new(),
hidden: Vec::new(), collapsed: Vec::new() });
section = Section::View;
continue;
}
if trimmed.starts_with("## ") { continue; }
match section {
Section::Category => {
let Some(cat) = categories.last_mut() else { continue };
if let Some(rest) = trimmed.strip_prefix("- ") {
let (name, group) = parse_bracketed(rest);
cat.items.push((name.to_string(), group.map(str::to_string)));
} else if let Some(rest) = trimmed.strip_prefix("> ") {
let (group, parent) = parse_bracketed(rest);
if let Some(p) = parent {
cat.group_parents.push((group.to_string(), p.to_string()));
}
}
}
Section::Formulas => {
if let Some(rest) = trimmed.strip_prefix("- ") {
let (raw, cat) = parse_bracketed(rest);
if let Some(c) = cat {
formulas.push((raw.to_string(), c.to_string()));
}
}
}
Section::Data => {
let Some(sep) = trimmed.find(" = ") else { continue };
let coords: Vec<(String, String)> = trimmed[..sep].split(", ")
.filter_map(|p| { let (c, i) = p.split_once('=')?;
Some((c.trim().to_string(), i.trim().to_string())) })
.collect();
if coords.is_empty() { continue; }
let vs = trimmed[sep + 3..].trim();
let value = if let Some(s) = vs.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
CellValue::Text(s.to_string())
} else if let Ok(n) = vs.parse::<f64>() {
CellValue::Number(n)
} else {
CellValue::Text(vs.to_string())
};
data.push((CellKey::new(coords), value));
}
Section::View => {
let Some(view) = views.last_mut() else { continue };
if let Some(fmt) = trimmed.strip_prefix("format: ") {
view.format = fmt.trim().to_string();
} else if let Some(rest) = trimmed.strip_prefix("hidden: ") {
if let Some((c, i)) = rest.trim().split_once('/') {
view.hidden.push((c.trim().to_string(), i.trim().to_string()));
}
} else if let Some(rest) = trimmed.strip_prefix("collapsed: ") {
if let Some((c, g)) = rest.trim().split_once('/') {
view.collapsed.push((c.trim().to_string(), g.trim().to_string()));
}
} else if let Some(colon) = trimmed.find(": ") {
let cat = trimmed[..colon].trim();
let rest = trimmed[colon + 2..].trim();
if let Some(sel_rest) = rest.strip_prefix("page") {
view.axes.push((cat.to_string(), Axis::Page));
if let Some(sel) = sel_rest.strip_prefix(", ") {
view.page_selections.push((cat.to_string(), sel.trim().to_string()));
}
} else {
let axis = match rest { "row" => Axis::Row, "column" => Axis::Column,
_ => continue };
view.axes.push((cat.to_string(), axis));
}
}
}
Section::None => {}
}
}
// ── Pass 2: build ─────────────────────────────────────────────────────────
let name = model_name.ok_or_else(|| anyhow::anyhow!("Missing model title (# Name)"))?;
let mut m = Model::new(&name);
// Categories first — registers them with all existing views via on_category_added
for pc in &categories {
m.add_category(&pc.name)?;
let cat = m.category_mut(&pc.name).unwrap();
for (item_name, group) in &pc.items {
match group {
Some(g) => {
cat.add_item_in_group(item_name, g);
if !cat.groups.iter().any(|e| &e.name == g) {
cat.add_group(Group::new(g));
}
}
None => { cat.add_item(item_name); }
}
}
for (group_name, parent) in &pc.group_parents {
match cat.groups.iter_mut().find(|g| &g.name == group_name) {
Some(g) => g.parent = Some(parent.clone()),
None => cat.add_group(Group::new(group_name).with_parent(parent)),
}
}
}
// Views — all categories are now registered, so set_axis works correctly
let mut active_view = String::new();
for pv in &views {
if pv.is_active { active_view = pv.name.clone(); }
if !m.views.contains_key(&pv.name) { m.create_view(&pv.name); }
let view = m.views.get_mut(&pv.name).unwrap();
for (cat, axis) in &pv.axes { view.set_axis(cat, *axis); }
for (cat, sel) in &pv.page_selections { view.set_page_selection(cat, sel); }
if !pv.format.is_empty() { view.number_format = pv.format.clone(); }
for (cat, item) in &pv.hidden { view.hide_item(cat, item); }
for (cat, grp) in &pv.collapsed { view.toggle_group_collapse(cat, grp); }
}
if !active_view.is_empty() && m.views.contains_key(&active_view) {
m.active_view = active_view;
}
// Formulas and data can go in any order relative to each other
for (raw, cat_name) in &formulas {
m.add_formula(parse_formula(raw, cat_name)
.with_context(|| format!("Formula: {raw}"))?);
}
for (key, value) in data {
m.set_cell(key, value);
}
Ok(m)
}
/// Split `"Name [Bracket]"` → `("Name", Some("Bracket"))` or `("Name", None)`.
fn parse_bracketed(s: &str) -> (&str, Option<&str>) {
if let Some(open) = s.rfind('[') {
if s.ends_with(']') {
let name = s[..open].trim();
let inner = &s[open + 1..s.len() - 1];
return (name, Some(inner));
}
}
(s.trim(), None)
}
fn coord_str(key: &CellKey) -> String {
key.0.iter().map(|(c, i)| format!("{}={}", c, i)).collect::<Vec<_>>().join(", ")
}
pub fn export_csv(model: &Model, view_name: &str, path: &Path) -> Result<()> {
let view = model.views.get(view_name)
.ok_or_else(|| anyhow::anyhow!("View '{view_name}' not found"))?;
let layout = GridLayout::new(model, view);
let mut out = String::new();
// Header row
let row_header = layout.row_cats.join("/");
let page_label: Vec<String> = layout.page_coords.iter()
.map(|(c, v)| format!("{c}={v}")).collect();
let header_prefix = if page_label.is_empty() { row_header } else {
format!("{} ({})", row_header, page_label.join(", "))
};
if !header_prefix.is_empty() {
out.push_str(&header_prefix);
out.push(',');
}
let col_labels: Vec<String> = (0..layout.col_count()).map(|ci| layout.col_label(ci)).collect();
out.push_str(&col_labels.join(","));
out.push('\n');
// Data rows
for ri in 0..layout.row_count() {
let row_label = layout.row_label(ri);
if !row_label.is_empty() {
out.push_str(&row_label);
out.push(',');
}
let row_values: Vec<String> = (0..layout.col_count())
.map(|ci| layout.cell_key(ri, ci)
.and_then(|key| model.evaluate(&key))
.map(|v| v.to_string())
.unwrap_or_default())
.collect();
out.push_str(&row_values.join(","));
out.push('\n');
}
std::fs::write(path, out)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::{format_md, parse_md};
use crate::model::Model;
use crate::model::cell::{CellKey, CellValue};
use crate::model::category::Group;
use crate::view::Axis;
use crate::formula::parse_formula;
fn coord(pairs: &[(&str, &str)]) -> CellKey {
CellKey::new(pairs.iter().map(|(c, i)| (c.to_string(), i.to_string())).collect())
}
fn two_cat_model() -> Model {
let mut m = Model::new("Budget");
m.add_category("Type").unwrap();
m.add_category("Month").unwrap();
for item in ["Food", "Gas"] { m.category_mut("Type").unwrap().add_item(item); }
for item in ["Jan", "Feb"] { m.category_mut("Month").unwrap().add_item(item); }
m
}
// ── format_md ────────────────────────────────────────────────────────────
#[test]
fn format_md_contains_model_name() {
let m = Model::new("My Model");
assert!(format_md(&m).contains("# My Model"));
}
#[test]
fn format_md_contains_category_and_items() {
let m = two_cat_model();
let text = format_md(&m);
assert!(text.contains("## Category: Type"));
assert!(text.contains("- Food"));
assert!(text.contains("- Gas"));
assert!(text.contains("## Category: Month"));
assert!(text.contains("- Jan"));
}
#[test]
fn format_md_item_with_group_uses_brackets() {
let mut m = Model::new("T");
m.add_category("Month").unwrap();
m.category_mut("Month").unwrap().add_item_in_group("Jan", "Q1");
let text = format_md(&m);
assert!(text.contains("- Jan [Q1]"), "got:\n{text}");
}
#[test]
fn format_md_group_hierarchy_uses_angle_prefix() {
let mut m = Model::new("T");
m.add_category("Month").unwrap();
m.category_mut("Month").unwrap().add_item_in_group("Jan", "Q1");
m.category_mut("Month").unwrap().add_group(Group::new("Q1").with_parent("2025"));
let text = format_md(&m);
assert!(text.contains("> Q1 [2025]"), "got:\n{text}");
}
#[test]
fn format_md_data_is_sorted_and_quoted() {
let mut m = two_cat_model();
m.set_cell(coord(&[("Month", "Feb"), ("Type", "Food")]), CellValue::Number(200.0));
m.set_cell(coord(&[("Month", "Jan"), ("Type", "Gas")]), CellValue::Text("N/A".into()));
let text = format_md(&m);
let data_pos = text.find("## Data").unwrap();
let feb_pos = text.find("Month=Feb").unwrap();
let jan_pos = text.find("Month=Jan").unwrap();
assert!(data_pos < feb_pos && feb_pos < jan_pos,
"expected sorted order Feb < Jan:\n{text}");
assert!(text.contains("= 200"), "number not quoted:\n{text}");
assert!(text.contains("= \"N/A\""), "text should be quoted:\n{text}");
}
#[test]
fn format_md_view_axes_and_active_marker() {
let m = two_cat_model();
let text = format_md(&m);
assert!(text.contains("## View: Default (active)"));
assert!(text.contains("Type: row"));
assert!(text.contains("Month: column"));
}
#[test]
fn format_md_page_axis_with_selection() {
let mut m = Model::new("T");
m.add_category("Type").unwrap();
m.add_category("Month").unwrap();
m.add_category("Region").unwrap();
m.category_mut("Type").unwrap().add_item("Food");
m.category_mut("Month").unwrap().add_item("Jan");
for r in ["East", "West"] { m.category_mut("Region").unwrap().add_item(r); }
m.active_view_mut().set_page_selection("Region", "West");
let text = format_md(&m);
assert!(text.contains("Region: page, West"), "got:\n{text}");
}
#[test]
fn format_md_formula_includes_category() {
let mut m = two_cat_model();
m.category_mut("Type").unwrap().add_item("Total");
m.add_formula(parse_formula("Total = Food + Gas", "Type").unwrap());
let text = format_md(&m);
assert!(text.contains("- Total = Food + Gas [Type]"), "got:\n{text}");
}
// ── parse_md ─────────────────────────────────────────────────────────────
#[test]
fn parse_md_round_trips_model_name() {
let m = Model::new("My Budget");
assert_eq!(parse_md(&format_md(&m)).unwrap().name, "My Budget");
}
#[test]
fn parse_md_round_trips_categories_and_items() {
let m = two_cat_model();
let m2 = parse_md(&format_md(&m)).unwrap();
assert!(m2.category("Type").is_some());
assert!(m2.category("Month").is_some());
assert!(m2.category("Type").unwrap().item_by_name("Food").is_some());
assert!(m2.category("Month").unwrap().item_by_name("Feb").is_some());
}
#[test]
fn parse_md_round_trips_item_group() {
let mut m = Model::new("T");
m.add_category("Month").unwrap();
m.category_mut("Month").unwrap().add_item_in_group("Jan", "Q1");
let m2 = parse_md(&format_md(&m)).unwrap();
let item = m2.category("Month").unwrap().item_by_name("Jan").unwrap();
assert_eq!(item.group.as_deref(), Some("Q1"));
}
#[test]
fn parse_md_round_trips_group_hierarchy() {
let mut m = Model::new("T");
m.add_category("Month").unwrap();
m.category_mut("Month").unwrap().add_item_in_group("Jan", "Q1");
m.category_mut("Month").unwrap().add_group(Group::new("Q1").with_parent("2025"));
let m2 = parse_md(&format_md(&m)).unwrap();
let groups = &m2.category("Month").unwrap().groups;
let q1 = groups.iter().find(|g| g.name == "Q1").unwrap();
assert_eq!(q1.parent.as_deref(), Some("2025"));
}
#[test]
fn parse_md_round_trips_data_cells() {
let mut m = two_cat_model();
m.set_cell(coord(&[("Month", "Jan"), ("Type", "Food")]), CellValue::Number(100.0));
m.set_cell(coord(&[("Month", "Feb"), ("Type", "Gas")]), CellValue::Text("N/A".into()));
let m2 = parse_md(&format_md(&m)).unwrap();
assert_eq!(m2.get_cell(&coord(&[("Month", "Jan"), ("Type", "Food")])),
Some(&CellValue::Number(100.0)));
assert_eq!(m2.get_cell(&coord(&[("Month", "Feb"), ("Type", "Gas")])),
Some(&CellValue::Text("N/A".into())));
}
#[test]
fn parse_md_round_trips_view_axes() {
let m = two_cat_model();
let m2 = parse_md(&format_md(&m)).unwrap();
let v = m2.active_view();
assert_eq!(v.axis_of("Type"), Axis::Row);
assert_eq!(v.axis_of("Month"), Axis::Column);
}
#[test]
fn parse_md_round_trips_page_selection() {
let mut m = Model::new("T");
m.add_category("Type").unwrap();
m.add_category("Month").unwrap();
m.add_category("Region").unwrap();
m.category_mut("Type").unwrap().add_item("Food");
m.category_mut("Month").unwrap().add_item("Jan");
for r in ["East", "West"] { m.category_mut("Region").unwrap().add_item(r); }
m.active_view_mut().set_page_selection("Region", "West");
let m2 = parse_md(&format_md(&m)).unwrap();
assert_eq!(m2.active_view().page_selection("Region"), Some("West"));
assert_eq!(m2.active_view().axis_of("Region"), Axis::Page);
}
#[test]
fn parse_md_round_trips_active_view() {
let mut m = two_cat_model();
m.create_view("Other");
m.switch_view("Other").unwrap();
let m2 = parse_md(&format_md(&m)).unwrap();
assert_eq!(m2.active_view, "Other");
}
#[test]
fn parse_md_round_trips_formula() {
let mut m = two_cat_model();
m.category_mut("Type").unwrap().add_item("Total");
m.add_formula(parse_formula("Total = Food + Gas", "Type").unwrap());
let m2 = parse_md(&format_md(&m)).unwrap();
let f = &m2.formulas()[0];
assert_eq!(f.raw, "Total = Food + Gas");
assert_eq!(f.target_category, "Type");
}
#[test]
fn parse_md_round_trips_hidden_item() {
let m = two_cat_model();
{
let m = &mut two_cat_model();
m.active_view_mut().hide_item("Type", "Gas");
let m2 = parse_md(&format_md(m)).unwrap();
assert!(m2.active_view().is_hidden("Type", "Gas"));
assert!(!m2.active_view().is_hidden("Type", "Food"));
}
}
#[test]
fn parse_md_order_independent_view_before_categories() {
// A hand-edited file with the view section before the category sections.
// The parser must still produce correct axis assignments.
let text = "# Test\n\
## View: Default (active)\n\
Type: row\n\
Month: column\n\
## Category: Type\n\
- Food\n\
## Category: Month\n\
- Jan\n";
let m = parse_md(text).unwrap();
assert_eq!(m.active_view().axis_of("Type"), Axis::Row);
assert_eq!(m.active_view().axis_of("Month"), Axis::Column);
}
#[test]
fn parse_md_order_independent_new_view_before_categories() {
// A non-Default view with swapped axes, declared before categories exist.
let text = "# Test\n\
## View: Transposed (active)\n\
Type: column\n\
Month: row\n\
## View: Default\n\
Type: row\n\
Month: column\n\
## Category: Type\n\
- Food\n\
## Category: Month\n\
- Jan\n";
let m = parse_md(text).unwrap();
let transposed = m.views.get("Transposed").unwrap();
assert_eq!(transposed.axis_of("Type"), Axis::Column);
assert_eq!(transposed.axis_of("Month"), Axis::Row);
let default = m.views.get("Default").unwrap();
assert_eq!(default.axis_of("Type"), Axis::Row);
assert_eq!(default.axis_of("Month"), Axis::Column);
}
#[test]
fn parse_md_order_independent_data_before_categories() {
let text = "# Test\n\
## Data\n\
Month=Jan, Type=Food = 42\n\
## Category: Type\n\
- Food\n\
## Category: Month\n\
- Jan\n\
## View: Default (active)\n\
Type: row\n\
Month: column\n";
let m = parse_md(text).unwrap();
assert_eq!(
m.get_cell(&coord(&[("Month", "Jan"), ("Type", "Food")])),
Some(&CellValue::Number(42.0))
);
}
#[test]
fn load_dispatcher_detects_legacy_json_by_brace() {
// The load() function routes to JSON deserializer when text starts with '{'
let m = two_cat_model();
let json = serde_json::to_string_pretty(&m).unwrap();
assert!(json.trim_start().starts_with('{'), "sanity check");
// Deserialise via the JSON path
let m2: Model = serde_json::from_str(&json).unwrap();
assert_eq!(m2.name, "Budget");
}
}