Files
improvise/crates/improvise-io/src/persistence/mod.rs

2403 lines
86 KiB
Rust

use anyhow::{Context, Result};
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use pest::Parser;
use pest_derive::Parser;
use std::io::{BufReader, BufWriter, Read, Write};
use std::path::Path;
use crate::formula::parse_formula;
use crate::model::category::Group;
use crate::model::cell::{CellKey, CellValue};
use crate::view::{Axis, GridLayout};
use crate::workbook::Workbook;
#[derive(Parser)]
#[grammar = "persistence/improv.pest"]
struct ImprovParser;
// ── Pipe quoting (shared between format and parse) ───────────────────────────
/// Check whether a name is a valid bare identifier: `[A-Za-z_][A-Za-z0-9_-]*`
fn is_bare_name(name: &str) -> bool {
let mut chars = name.chars();
match chars.next() {
Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
_ => return false,
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
}
/// Escape a string for use inside pipe delimiters: `\|`, `\\`, `\n`.
fn escape_pipe(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'|' => out.push_str("\\|"),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
c => out.push(c),
}
}
out
}
/// Unescape a pipe-quoted body: `\|` → `|`, `\\` → `\`, `\n` → newline.
fn unescape_pipe(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('|') => out.push('|'),
Some('\\') => out.push('\\'),
Some('n') => out.push('\n'),
Some(other) => {
out.push('\\');
out.push(other);
}
None => out.push('\\'),
}
} else {
out.push(c);
}
}
out
}
/// CL-style `|...|` pipe quoting unless the name is a valid bare identifier.
fn quote_name(name: &str) -> String {
if is_bare_name(name) {
name.to_string()
} else {
format!("|{}|", escape_pipe(name))
}
}
/// Pipe-quote unconditionally (for text cell values that must be distinguished
/// from numbers).
fn pipe_quote(s: &str) -> String {
format!("|{}|", escape_pipe(s))
}
// ── Number formatting ────────────────────────────────────────────────────────
fn format_number(n: f64) -> String {
if n.is_infinite() {
return if n.is_sign_positive() {
"inf".to_string()
} else {
"-inf".to_string()
};
}
if n.is_nan() {
return "nan".to_string();
}
if n.fract() == 0.0 && n.abs() < 1e15 {
format!("{}", n as i64)
} else {
let display = format!("{n}");
if display.parse::<f64>() == Ok(n) {
display
} else {
format!("{n:?}")
}
}
}
// ── File I/O ─────────────────────────────────────────────────────────────────
fn is_gzip(path: &Path) -> bool {
path.to_str().is_some_and(|s| s.ends_with(".gz"))
}
pub fn save(workbook: &Workbook, path: &Path) -> Result<()> {
let text = format_md(workbook);
if is_gzip(path) {
let file = std::fs::File::create(path)
.with_context(|| format!("Cannot create {}", path.display()))?;
let mut enc = GzEncoder::new(BufWriter::new(file), Compression::default());
enc.write_all(text.as_bytes())?;
enc.finish()?;
} else {
std::fs::write(path, &text).with_context(|| format!("Cannot write {}", path.display()))?;
}
Ok(())
}
pub fn load(path: &Path) -> Result<Workbook> {
let file =
std::fs::File::open(path).with_context(|| format!("Cannot open {}", path.display()))?;
let text = if is_gzip(path) {
let mut s = String::new();
GzDecoder::new(BufReader::new(file)).read_to_string(&mut s)?;
s
} else {
let mut s = String::new();
BufReader::new(file).read_to_string(&mut s)?;
s
};
if text.trim_start().starts_with('{') {
serde_json::from_str(&text).context("Failed to deserialize workbook")
} else {
parse_md(&text)
}
}
pub fn autosave_path(path: &Path) -> std::path::PathBuf {
let mut p = path.to_path_buf();
let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("model");
p.set_file_name(format!(".{name}.autosave"));
p
}
/// Serialize a workbook to the markdown `.improv` format.
pub fn format_md(workbook: &Workbook) -> String {
// writeln! to a String is infallible; this macro avoids .unwrap() noise.
macro_rules! w {
($dst:expr, $($arg:tt)*) => { { use std::fmt::Write; writeln!($dst, $($arg)*).ok(); } }
}
let model = &workbook.model;
let mut out = String::new();
w!(out, "v2025-04-09");
w!(out, "# {}", model.name);
w!(out, "Initial View: {}", workbook.active_view);
// ── Views (first: typically small, orients the reader) ───────────
for (_view_name, view) in &workbook.views {
w!(out, "\n## View: {}", view.name);
for (cat, axis) in &view.category_axes {
let qcat = quote_name(cat);
if *axis == Axis::Page
&& let Some(sel) = view.page_selections.get(cat)
{
w!(out, "{qcat}: page, {}", quote_name(sel));
continue;
}
let axis_str = match axis {
Axis::Row => "row",
Axis::Column => "column",
Axis::Page => "page",
Axis::None => "none",
};
w!(out, "{qcat}: {axis_str}");
}
if !view.number_format.is_empty() {
w!(out, "format: {}", view.number_format);
}
for (prefix, map) in [
("hidden", &view.hidden_items),
("collapsed", &view.collapsed_groups),
] {
let mut pairs: Vec<_> = map
.iter()
.flat_map(|(cat, items)| {
items.iter().map(move |item| (cat.as_str(), item.as_str()))
})
.collect();
pairs.sort();
for (cat, item) in pairs {
w!(out, "{prefix}: {}/{}", quote_name(cat), quote_name(item));
}
}
}
// ── Formulas ─────────────────────────────────────────────────────
if !model.formulas().is_empty() {
w!(out, "\n## Formulas");
for f in model.formulas() {
if f.target_category == "_Measure" {
w!(out, "- {}", f.raw);
} else {
w!(out, "- {} [{}]", f.raw, f.target_category);
}
}
}
// ── Categories (items comma-separated on one line) ───────────────
// Collect formula targets so we can exclude them from _Measure items
let formula_targets: std::collections::HashSet<&str> = model
.formulas()
.iter()
.filter(|f| f.target_category == "_Measure")
.map(|f| f.target.as_str())
.collect();
for cat in model.categories.values() {
use crate::model::category::CategoryKind;
// Skip _Index and _Dim — they are fully virtual, never persisted
if matches!(
cat.kind,
CategoryKind::VirtualIndex | CategoryKind::VirtualDim
) {
continue;
}
w!(out, "\n## Category: {}", cat.name);
let mut bare: Vec<String> = Vec::new();
let mut grouped: Vec<String> = Vec::new();
for item in cat.items.values() {
// For _Measure, skip items that are formula targets
// (they'll be recreated from the ## Formulas section)
if cat.kind == CategoryKind::VirtualMeasure
&& formula_targets.contains(item.name.as_str())
{
continue;
}
match &item.group {
Some(g) => grouped.push(format!("{}[{}]", quote_name(&item.name), quote_name(g))),
None => bare.push(quote_name(&item.name)),
}
}
if !bare.is_empty() {
w!(out, "- {}", bare.join(", "));
}
for g_item in &grouped {
w!(out, "- {g_item}");
}
for g in &cat.groups {
if let Some(parent) = &g.parent {
w!(out, "> {}[{}]", quote_name(&g.name), quote_name(parent));
}
}
}
// ── Data (last: typically the largest section) ────────────────────
let mut cells: Vec<_> = model.data.iter_cells().collect();
cells.sort_by_key(|(k, _)| coord_str(k));
if !cells.is_empty() {
w!(out, "\n## Data");
for (key, value) in cells {
let val_str = match value {
CellValue::Number(n) => format_number(*n),
CellValue::Text(s) | CellValue::Error(s) => pipe_quote(s),
};
w!(out, "{} = {}", coord_str(&key), val_str);
}
}
out
}
/// Parse the `.improv` format into a Model using the pest grammar.
///
/// Sections may appear in any order; a two-pass approach registers categories
/// before configuring views.
pub fn parse_md(text: &str) -> Result<Workbook> {
use anyhow::bail;
use pest::iterators::{Pair, Pairs};
let file = ImprovParser::parse(Rule::file, text)
.map_err(|e| anyhow::anyhow!("Parse error: {e}"))?
.next()
.ok_or_else(|| anyhow::anyhow!("Empty parse result"))?;
// ── Intermediate collectors ──────────────────────────────────────────────
struct PCategory {
name: String,
items: Vec<(String, Option<String>)>,
group_parents: Vec<(String, String)>,
}
struct PView {
name: String,
axes: Vec<(String, Axis)>,
page_selections: Vec<(String, String)>,
format: String,
hidden: Vec<(String, String)>,
collapsed: Vec<(String, String)>,
}
let mut model_name: Option<String> = None;
let mut initial_view: Option<String> = None;
let mut categories: Vec<PCategory> = Vec::new();
let mut formulas: Vec<(String, String)> = Vec::new();
let mut data: Vec<(CellKey, CellValue)> = Vec::new();
let mut views: Vec<PView> = Vec::new();
// ── Helpers for walking the pest parse tree ──────────────────────────────
/// Advance an iterator, returning an error if empty.
fn next<'a>(pairs: &mut Pairs<'a, Rule>, ctx: &str) -> Result<Pair<'a, Rule>> {
pairs
.next()
.ok_or_else(|| anyhow::anyhow!("Expected child in {ctx}"))
}
/// Extract the first child's text content, trimmed.
fn first_str(pair: Pair<'_, Rule>) -> Result<String> {
Ok(next(&mut pair.into_inner(), "first_str")?
.as_str()
.trim()
.to_string())
}
fn extract_name(pair: Pair<'_, Rule>) -> Result<String> {
match pair.as_rule() {
Rule::bare_name => Ok(pair.as_str().to_string()),
Rule::pipe_quoted => {
let inner = next(&mut pair.into_inner(), "pipe_quoted")?;
Ok(unescape_pipe(inner.as_str()))
}
_ => Ok(pair.as_str().to_string()),
}
}
/// Extract two names from a pair's children.
fn extract_name_pair(pair: Pair<'_, Rule>) -> Result<(String, String)> {
let ctx = format!("{:?}", pair.as_rule());
let mut parts = pair.into_inner();
let a = extract_name(next(&mut parts, &ctx)?)?;
let b = extract_name(next(&mut parts, &ctx)?)?;
Ok((a, b))
}
// ── Pass 1: walk the parse tree ─────────────────────────────────────────
for pair in file.into_inner() {
match pair.as_rule() {
Rule::version_line | Rule::EOI => {}
Rule::model_name => {
model_name = Some(first_str(pair)?);
}
Rule::initial_view => {
initial_view = Some(first_str(pair)?);
}
Rule::category_section => {
let mut inner = pair.into_inner();
let cname = next(&mut inner, "category_section")?
.as_str()
.trim()
.to_string();
let mut pc = PCategory {
name: cname,
items: Vec::new(),
group_parents: Vec::new(),
};
for entry in inner {
match entry.as_rule() {
Rule::item_list => {
for name_pair in entry.into_inner() {
pc.items.push((extract_name(name_pair)?, None));
}
}
Rule::grouped_item => {
let (name, group) = extract_name_pair(entry)?;
pc.items.push((name, Some(group)));
}
Rule::group_hierarchy => {
pc.group_parents.push(extract_name_pair(entry)?);
}
_ => {}
}
}
categories.push(pc);
}
Rule::formulas_section => {
for fl in pair.into_inner() {
if fl.as_rule() == Rule::formula_line {
let raw = first_str(fl)?;
if let Some(i) = raw.rfind(" [")
&& raw.ends_with(']')
{
formulas.push((
raw[..i].to_string(),
raw[i + 2..raw.len() - 1].to_string(),
));
continue;
}
// No [Category] suffix — default to _Measure
if !raw.is_empty() && raw.contains('=') {
formulas.push((raw, "_Measure".to_string()));
}
}
}
}
Rule::data_section => {
for dl in pair.into_inner() {
if dl.as_rule() == Rule::data_line {
let mut dl_inner = dl.into_inner();
let coord_list = next(&mut dl_inner, "data_line coords")?;
let value_pair = next(&mut dl_inner, "data_line value")?;
let coords: Vec<_> = coord_list
.into_inner()
.filter(|p| p.as_rule() == Rule::coord)
.map(extract_name_pair)
.collect::<Result<_>>()?;
let value = match value_pair.as_rule() {
Rule::number => {
CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
}
Rule::pipe_quoted => {
let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
CellValue::Text(unescape_pipe(inner.as_str()))
}
Rule::bare_value => match value_pair.as_str().trim() {
"inf" => CellValue::Number(f64::INFINITY),
"-inf" => CellValue::Number(f64::NEG_INFINITY),
"nan" => CellValue::Number(f64::NAN),
s => CellValue::Text(s.to_string()),
},
_ => CellValue::Text(value_pair.as_str().to_string()),
};
data.push((CellKey::new(coords), value));
}
}
}
Rule::view_section => {
let mut inner = pair.into_inner();
let vname = next(&mut inner, "view_section")?
.as_str()
.trim()
.to_string();
let mut pv = PView {
name: vname,
axes: Vec::new(),
page_selections: Vec::new(),
format: String::new(),
hidden: Vec::new(),
collapsed: Vec::new(),
};
for entry in inner {
match entry.as_rule() {
Rule::axis_line => {
let mut parts = entry.into_inner();
let cat = extract_name(next(&mut parts, "axis cat")?)?;
let kind_str = next(&mut parts, "axis kind")?.as_str();
let axis = match kind_str {
"row" => Axis::Row,
"column" => Axis::Column,
"page" => Axis::Page,
"none" => Axis::None,
_ => bail!("Unknown axis kind: {kind_str}"),
};
pv.axes.push((cat.clone(), axis));
if axis == Axis::Page
&& let Some(sel_pair) = parts.next()
{
pv.page_selections.push((cat, extract_name(sel_pair)?));
}
}
Rule::format_line => pv.format = first_str(entry)?,
Rule::hidden_line => pv.hidden.push(extract_name_pair(entry)?),
Rule::collapsed_line => pv.collapsed.push(extract_name_pair(entry)?),
_ => {}
}
}
views.push(pv);
}
_ => {}
}
}
// ── Pass 2: build the Workbook ──────────────────────────────────────────
let name = model_name.ok_or_else(|| anyhow::anyhow!("Missing model title (# Name)"))?;
let mut wb = Workbook::new(&name);
for pc in &categories {
wb.add_category(&pc.name)?;
let cat = wb
.model
.category_mut(&pc.name)
.ok_or_else(|| anyhow::anyhow!("Category '{}' not found after add", pc.name))?;
for (item_name, group) in &pc.items {
match group {
Some(g) => {
cat.add_item_in_group(item_name, g);
if !cat.groups.iter().any(|e| &e.name == g) {
cat.add_group(Group::new(g));
}
}
None => {
cat.add_item(item_name);
}
}
}
for (group_name, parent) in &pc.group_parents {
match cat.groups.iter_mut().find(|g| &g.name == group_name) {
Some(g) => g.parent = Some(parent.clone()),
None => cat.add_group(Group::new(group_name).with_parent(parent)),
}
}
}
for pv in &views {
if !wb.views.contains_key(&pv.name) {
wb.create_view(&pv.name);
}
let view = wb
.views
.get_mut(&pv.name)
.ok_or_else(|| anyhow::anyhow!("View '{}' not found after create", pv.name))?;
for (cat, axis) in &pv.axes {
view.set_axis(cat, *axis);
}
for (cat, sel) in &pv.page_selections {
view.set_page_selection(cat, sel);
}
if !pv.format.is_empty() {
view.number_format = pv.format.clone();
}
for (cat, item) in &pv.hidden {
view.hide_item(cat, item);
}
for (cat, grp) in &pv.collapsed {
view.toggle_group_collapse(cat, grp);
}
}
if let Some(iv) = &initial_view
&& wb.views.contains_key(iv)
{
wb.active_view = iv.clone();
}
for (raw, cat_name) in &formulas {
wb.model
.add_formula(parse_formula(raw, cat_name).with_context(|| format!("Formula: {raw}"))?);
}
for (key, value) in data {
wb.model.set_cell(key, value);
}
Ok(wb)
}
fn coord_str(key: &CellKey) -> String {
key.0
.iter()
.map(|(c, i)| format!("{}={}", quote_name(c), quote_name(i)))
.collect::<Vec<_>>()
.join(", ")
}
pub fn export_csv(workbook: &Workbook, view_name: &str, path: &Path) -> Result<()> {
let view = workbook
.views
.get(view_name)
.ok_or_else(|| anyhow::anyhow!("View '{view_name}' not found"))?;
let layout = GridLayout::new(&workbook.model, view);
let model = &workbook.model;
let mut out = String::new();
// Header row
let row_header = layout.row_cats.join("/");
let page_label: Vec<String> = layout
.page_coords
.iter()
.map(|(c, v)| format!("{c}={v}"))
.collect();
let header_prefix = if page_label.is_empty() {
row_header
} else {
format!("{} ({})", row_header, page_label.join(", "))
};
if !header_prefix.is_empty() {
out.push_str(&header_prefix);
out.push(',');
}
let col_labels: Vec<String> = (0..layout.col_count())
.map(|ci| layout.col_label(ci))
.collect();
out.push_str(&col_labels.join(","));
out.push('\n');
// Data rows
for ri in 0..layout.row_count() {
let row_label = layout.row_label(ri);
if !row_label.is_empty() {
out.push_str(&row_label);
out.push(',');
}
let row_values: Vec<String> = (0..layout.col_count())
.map(|ci| layout.display_text(model, ri, ci, false, 0))
.collect();
out.push_str(&row_values.join(","));
out.push('\n');
}
std::fs::write(path, out)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::{format_md, parse_md};
use crate::formula::parse_formula;
use crate::model::category::Group;
use crate::model::cell::{CellKey, CellValue};
use crate::view::Axis;
use crate::workbook::Workbook;
fn coord(pairs: &[(&str, &str)]) -> CellKey {
CellKey::new(
pairs
.iter()
.map(|(c, i)| (c.to_string(), i.to_string()))
.collect(),
)
}
fn two_cat_model() -> Workbook {
let mut m = Workbook::new("Budget");
m.add_category("Type").unwrap();
m.add_category("Month").unwrap();
for item in ["Food", "Gas"] {
m.model.category_mut("Type").unwrap().add_item(item);
}
for item in ["Jan", "Feb"] {
m.model.category_mut("Month").unwrap().add_item(item);
}
m
}
// ── format_md ────────────────────────────────────────────────────────────
#[test]
fn format_md_contains_model_name() {
let m = Workbook::new("My Model");
assert!(format_md(&m).contains("# My Model"));
}
#[test]
fn format_md_contains_category_and_items() {
let m = two_cat_model();
let text = format_md(&m);
assert!(text.contains("## Category: Type"));
// Bare items are now comma-separated on one line
assert!(
text.contains("- Food, Gas"),
"expected comma-separated items:\n{text}"
);
assert!(text.contains("## Category: Month"));
assert!(text.contains("Jan"));
}
#[test]
fn format_md_item_with_group_uses_brackets() {
let mut m = Workbook::new("T");
m.add_category("Month").unwrap();
m.model
.category_mut("Month")
.unwrap()
.add_item_in_group("Jan", "Q1");
let text = format_md(&m);
assert!(text.contains("- Jan[Q1]"), "got:\n{text}");
}
#[test]
fn format_md_group_hierarchy_uses_angle_prefix() {
let mut m = Workbook::new("T");
m.add_category("Month").unwrap();
m.model
.category_mut("Month")
.unwrap()
.add_item_in_group("Jan", "Q1");
m.model
.category_mut("Month")
.unwrap()
.add_group(Group::new("Q1").with_parent("2025"));
let text = format_md(&m);
assert!(text.contains("> Q1[|2025|]"), "got:\n{text}");
}
#[test]
fn format_md_data_is_sorted_and_quoted() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Month", "Feb"), ("Type", "Food")]),
CellValue::Number(200.0),
);
m.model.set_cell(
coord(&[("Month", "Jan"), ("Type", "Gas")]),
CellValue::Text("N/A".into()),
);
let text = format_md(&m);
let data_pos = text.find("## Data").unwrap();
let feb_pos = text.find("Month=Feb").unwrap();
let jan_pos = text.find("Month=Jan").unwrap();
assert!(
data_pos < feb_pos && feb_pos < jan_pos,
"expected sorted order Feb < Jan:\n{text}"
);
assert!(text.contains("= 200"), "number not quoted:\n{text}");
assert!(
text.contains("= |N/A|"),
"text should be pipe-quoted:\n{text}"
);
}
#[test]
fn format_md_view_axes() {
let m = two_cat_model();
let text = format_md(&m);
assert!(text.contains("## View: Default"));
assert!(text.contains("Type: row"));
assert!(text.contains("Month: column"));
}
#[test]
fn format_md_page_axis_with_selection() {
let mut m = Workbook::new("T");
m.add_category("Type").unwrap();
m.add_category("Month").unwrap();
m.add_category("Region").unwrap();
m.model.category_mut("Type").unwrap().add_item("Food");
m.model.category_mut("Month").unwrap().add_item("Jan");
for r in ["East", "West"] {
m.model.category_mut("Region").unwrap().add_item(r);
}
m.active_view_mut().set_page_selection("Region", "West");
let text = format_md(&m);
assert!(text.contains("Region: page, West"), "got:\n{text}");
}
#[test]
fn format_md_formula_includes_category() {
let mut m = two_cat_model();
m.model.category_mut("Type").unwrap().add_item("Total");
m.model
.add_formula(parse_formula("Total = Food + Gas", "Type").unwrap());
let text = format_md(&m);
assert!(text.contains("- Total = Food + Gas [Type]"), "got:\n{text}");
}
// ── parse_md ─────────────────────────────────────────────────────────────
#[test]
fn parse_md_round_trips_model_name() {
let m = Workbook::new("My Budget");
assert_eq!(parse_md(&format_md(&m)).unwrap().model.name, "My Budget");
}
#[test]
fn parse_md_round_trips_categories_and_items() {
let m = two_cat_model();
let m2 = parse_md(&format_md(&m)).unwrap();
assert!(
m2.model
.category("Type")
.and_then(|c| c.items.get("Food"))
.is_some()
);
assert!(
m2.model
.category("Month")
.and_then(|c| c.items.get("Feb"))
.is_some()
);
}
#[test]
fn parse_md_round_trips_item_group() {
let mut m = Workbook::new("T");
m.add_category("Month").unwrap();
m.model
.category_mut("Month")
.unwrap()
.add_item_in_group("Jan", "Q1");
let m2 = parse_md(&format_md(&m)).unwrap();
assert_eq!(
m2.model
.category("Month")
.and_then(|c| c.items.get("Jan"))
.and_then(|i| i.group.as_deref()),
Some("Q1")
);
}
#[test]
fn parse_md_round_trips_group_hierarchy() {
let mut m = Workbook::new("T");
m.add_category("Month").unwrap();
m.model
.category_mut("Month")
.unwrap()
.add_item_in_group("Jan", "Q1");
m.model
.category_mut("Month")
.unwrap()
.add_group(Group::new("Q1").with_parent("2025"));
let m2 = parse_md(&format_md(&m)).unwrap();
let groups = &m2.model.category("Month").unwrap().groups;
let q1 = groups.iter().find(|g| g.name == "Q1").unwrap();
assert_eq!(q1.parent.as_deref(), Some("2025"));
}
#[test]
fn parse_md_round_trips_data_cells() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Month", "Jan"), ("Type", "Food")]),
CellValue::Number(100.0),
);
m.model.set_cell(
coord(&[("Month", "Feb"), ("Type", "Gas")]),
CellValue::Text("N/A".into()),
);
let m2 = parse_md(&format_md(&m)).unwrap();
assert_eq!(
m2.model
.get_cell(&coord(&[("Month", "Jan"), ("Type", "Food")])),
Some(&CellValue::Number(100.0))
);
assert_eq!(
m2.model
.get_cell(&coord(&[("Month", "Feb"), ("Type", "Gas")])),
Some(&CellValue::Text("N/A".into()))
);
}
#[test]
fn parse_md_round_trips_view_axes() {
let m = two_cat_model();
let m2 = parse_md(&format_md(&m)).unwrap();
let v = m2.active_view();
assert_eq!(v.axis_of("Type"), Axis::Row);
assert_eq!(v.axis_of("Month"), Axis::Column);
}
#[test]
fn parse_md_round_trips_page_selection() {
let mut m = Workbook::new("T");
m.add_category("Type").unwrap();
m.add_category("Month").unwrap();
m.add_category("Region").unwrap();
m.model.category_mut("Type").unwrap().add_item("Food");
m.model.category_mut("Month").unwrap().add_item("Jan");
for r in ["East", "West"] {
m.model.category_mut("Region").unwrap().add_item(r);
}
m.active_view_mut().set_page_selection("Region", "West");
let m2 = parse_md(&format_md(&m)).unwrap();
assert_eq!(m2.active_view().page_selection("Region"), Some("West"));
assert_eq!(m2.active_view().axis_of("Region"), Axis::Page);
}
// active_view is no longer persisted — it's runtime state
#[test]
fn parse_md_round_trips_formula() {
let mut m = two_cat_model();
m.model.category_mut("Type").unwrap().add_item("Total");
m.model
.add_formula(parse_formula("Total = Food + Gas", "Type").unwrap());
let m2 = parse_md(&format_md(&m)).unwrap();
let f = &m2.model.formulas()[0];
assert_eq!(f.raw, "Total = Food + Gas");
assert_eq!(f.target_category, "Type");
}
#[test]
fn parse_md_round_trips_hidden_item() {
let _ = two_cat_model();
{
let m = &mut two_cat_model();
m.active_view_mut().hide_item("Type", "Gas");
let m2 = parse_md(&format_md(m)).unwrap();
assert!(m2.active_view().is_hidden("Type", "Gas"));
assert!(!m2.active_view().is_hidden("Type", "Food"));
}
}
#[test]
fn parse_md_order_independent_view_before_categories() {
// A hand-edited file with the view section before the category sections.
// The parser must still produce correct axis assignments.
let text = "v2025-04-09\n# Test\n\
## View: Default\n\
Type: row\n\
Month: column\n\
## Category: Type\n\
- Food\n\
## Category: Month\n\
- Jan\n";
let m = parse_md(text).unwrap();
assert_eq!(m.active_view().axis_of("Type"), Axis::Row);
assert_eq!(m.active_view().axis_of("Month"), Axis::Column);
}
#[test]
fn parse_md_order_independent_new_view_before_categories() {
let text = "v2025-04-09\n# Test\n\
## View: Transposed\n\
Type: column\n\
Month: row\n\
## View: Default\n\
Type: row\n\
Month: column\n\
## Category: Type\n\
- Food\n\
## Category: Month\n\
- Jan\n";
let m = parse_md(text).unwrap();
let transposed = m.views.get("Transposed").unwrap();
assert_eq!(transposed.axis_of("Type"), Axis::Column);
assert_eq!(transposed.axis_of("Month"), Axis::Row);
let default = m.views.get("Default").unwrap();
assert_eq!(default.axis_of("Type"), Axis::Row);
assert_eq!(default.axis_of("Month"), Axis::Column);
}
#[test]
fn parse_md_order_independent_data_before_categories() {
let text = "v2025-04-09\n# Test\n\
## Data\n\
Month=Jan, Type=Food = 42\n\
## Category: Type\n\
- Food\n\
## Category: Month\n\
- Jan\n\
## View: Default\n\
Type: row\n\
Month: column\n";
let m = parse_md(text).unwrap();
assert_eq!(
m.model
.get_cell(&coord(&[("Month", "Jan"), ("Type", "Food")])),
Some(&CellValue::Number(42.0))
);
}
#[test]
fn load_dispatcher_detects_legacy_json_by_brace() {
// The load() function routes to JSON deserializer when text starts with '{'
let m = two_cat_model();
let json = serde_json::to_string_pretty(&m).unwrap();
assert!(json.trim_start().starts_with('{'), "sanity check");
// Deserialise via the JSON path
let m2: Workbook = serde_json::from_str(&json).unwrap();
assert_eq!(m2.model.name, "Budget");
}
// ── save/load roundtrip via file ────────────────────────────────────
#[test]
fn save_and_load_roundtrip_plain() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Number(42.0),
);
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("test.improv");
super::save(&m, &path).unwrap();
let loaded = super::load(&path).unwrap();
assert_eq!(loaded.model.name, "Budget");
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Number(42.0))
);
}
#[test]
fn save_and_load_roundtrip_gzip() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Gas"), ("Month", "Feb")]),
CellValue::Number(99.0),
);
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("test.improv.gz");
super::save(&m, &path).unwrap();
let loaded = super::load(&path).unwrap();
assert_eq!(loaded.model.name, "Budget");
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Gas"), ("Month", "Feb")])),
Some(&CellValue::Number(99.0))
);
}
// ── autosave_path ───────────────────────────────────────────────────
#[test]
fn autosave_path_inserts_dot_prefix() {
let p = std::path::Path::new("/home/user/data/budget.improv");
let auto = super::autosave_path(p);
assert_eq!(
auto.file_name().unwrap().to_str().unwrap(),
".budget.improv.autosave"
);
}
// ── format_md: collapsed groups ─────────────────────────────────────
#[test]
fn format_md_collapsed_group() {
let mut m = two_cat_model();
m.active_view_mut().toggle_group_collapse("Type", "MyGroup");
let text = format_md(&m);
assert!(text.contains("collapsed: Type/MyGroup"));
}
// ── format_md: page axis without selection ──────────────────────────
#[test]
fn format_md_page_without_selection() {
let mut m = two_cat_model();
m.active_view_mut().set_axis("Month", Axis::Page);
// Don't set a page selection
let text = format_md(&m);
assert!(text.contains("Month: page"));
// Should NOT have a comma after "page" (no selection)
let line = text.lines().find(|l| l.starts_with("Month:")).unwrap();
assert!(!line.contains(','), "Expected no selection, got: {line}");
}
// ── format_md: none axis ────────────────────────────────────────────
#[test]
fn format_md_none_axis() {
let mut m = two_cat_model();
m.active_view_mut().set_axis("Month", Axis::None);
let text = format_md(&m);
assert!(text.contains("Month: none"));
}
// ── format_md: number format ────────────────────────────────────────
#[test]
fn format_md_includes_number_format() {
let mut m = two_cat_model();
m.active_view_mut().number_format = ",.2f".to_string();
let text = format_md(&m);
assert!(text.contains("format: ,.2f"));
}
// ── parse_md: comments and blank lines ──────────────────────────────
#[test]
fn parse_md_ignores_blank_and_comment_lines() {
let text = r#"v2025-04-09
# Test Model
## Category: Type
- Food, Gas
## Data
Type=Food = 42
"#;
let m = parse_md(text).unwrap();
assert_eq!(m.model.name, "Test Model");
assert!(m.model.category("Type").is_some());
}
// ── parse_md: text values ───────────────────────────────────────────
#[test]
fn parse_md_round_trips_text_cell_values() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("pending".to_string()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("pending".to_string()))
);
}
// ── parse_md: collapsed groups roundtrip ────────────────────────────
#[test]
fn parse_md_round_trips_collapsed_group() {
let mut m = two_cat_model();
m.active_view_mut().toggle_group_collapse("Type", "MyGroup");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert!(loaded.active_view().is_group_collapsed("Type", "MyGroup"));
}
// ── parse_md: number format roundtrip ───────────────────────────────
#[test]
fn parse_md_round_trips_number_format() {
let mut m = two_cat_model();
m.active_view_mut().number_format = ",.2f".to_string();
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(loaded.active_view().number_format, ",.2f");
}
// ── parse_md: none axis roundtrip ───────────────────────────────────
#[test]
fn parse_md_round_trips_none_axis() {
let mut m = two_cat_model();
m.active_view_mut().set_axis("Month", Axis::None);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(loaded.active_view().axis_of("Month"), Axis::None);
}
// ── parse_md: multiple views ────────────────────────────────────────
#[test]
fn parse_md_round_trips_multiple_views() {
let mut m = two_cat_model();
m.create_view("Alternate");
{
let v = m.views.get_mut("Alternate").unwrap();
v.set_axis("Type", Axis::Column);
v.set_axis("Month", Axis::Row);
}
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert!(loaded.views.contains_key("Default"));
assert!(loaded.views.contains_key("Alternate"));
let alt = loaded.views.get("Alternate").unwrap();
assert_eq!(alt.axis_of("Type"), Axis::Column);
assert_eq!(alt.axis_of("Month"), Axis::Row);
}
// ── export_csv ──────────────────────────────────────────────────────
#[test]
fn export_csv_produces_valid_output() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Number(100.0),
);
m.model.set_cell(
coord(&[("Type", "Gas"), ("Month", "Feb")]),
CellValue::Number(50.0),
);
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("export.csv");
super::export_csv(&m, "Default", &path).unwrap();
let content = std::fs::read_to_string(&path).unwrap();
// Should have a header and data rows
let lines: Vec<&str> = content.lines().collect();
assert!(lines.len() >= 2, "Expected header + data, got: {content}");
// Header should contain column labels
assert!(
lines[0].contains(','),
"Expected CSV header, got: {}",
lines[0]
);
}
#[test]
fn export_csv_unknown_view_returns_error() {
let m = two_cat_model();
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("export.csv");
assert!(super::export_csv(&m, "Nonexistent", &path).is_err());
}
// ── Full save/load/format roundtrip with all features ───────────────
#[test]
fn full_roundtrip_preserves_all_features() {
let mut m = two_cat_model();
// Add data
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Number(100.0),
);
m.model.set_cell(
coord(&[("Type", "Gas"), ("Month", "Feb")]),
CellValue::Text("pending".to_string()),
);
// Add formula
let f = parse_formula("Gas = Food * 2", "Type").unwrap();
m.model.add_formula(f);
// Configure view
m.active_view_mut().set_axis("Month", Axis::Page);
m.active_view_mut().set_page_selection("Month", "Jan");
m.active_view_mut().hide_item("Type", "Gas");
m.active_view_mut().toggle_group_collapse("Type", "G1");
m.active_view_mut().number_format = ",.0".to_string();
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
// Verify everything roundtripped
assert_eq!(loaded.model.name, "Budget");
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Number(100.0))
);
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Gas"), ("Month", "Feb")])),
Some(&CellValue::Text("pending".to_string()))
);
assert!(!loaded.model.formulas().is_empty());
let v = loaded.active_view();
assert_eq!(v.axis_of("Month"), Axis::Page);
assert_eq!(v.page_selection("Month"), Some("Jan"));
assert!(v.is_hidden("Type", "Gas"));
assert!(v.is_group_collapsed("Type", "G1"));
assert_eq!(v.number_format, ",.0");
}
// ── Stress tests: special characters in values ──────────────────────
#[test]
fn text_value_with_embedded_comma() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("Smith, Jr.".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("Smith, Jr.".into())),
"Comma inside quoted text was corrupted.\n{text}"
);
}
#[test]
fn text_value_with_embedded_double_quote() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text(r#"He said "hello""#.into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text(r#"He said "hello""#.into())),
"Embedded double quotes were corrupted.\n{text}"
);
}
#[test]
fn text_value_with_equals_space_sequence() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("x = y".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("x = y".into())),
"Text containing ' = ' was corrupted.\n{text}"
);
}
#[test]
fn text_value_is_single_double_quote() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("\"".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("\"".into())),
"Single double-quote text was corrupted.\n{text}"
);
}
#[test]
fn text_value_is_empty_string() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("".into())),
"Empty string was corrupted.\n{text}"
);
}
#[test]
fn text_value_with_newline() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("line1\nline2".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("line1\nline2".into())),
"Newline in text was corrupted.\n{text}"
);
}
#[test]
fn text_value_looks_like_number() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("42".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("42".into())),
"Numeric-looking text was converted to Number.\n{text}"
);
}
#[test]
fn text_value_with_hash_prefix() {
let mut m = two_cat_model();
m.model.set_cell(
coord(&[("Type", "Food"), ("Month", "Jan")]),
CellValue::Text("#NotAHeader".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "Food"), ("Month", "Jan")])),
Some(&CellValue::Text("#NotAHeader".into())),
"Hash-prefixed text was misinterpreted.\n{text}"
);
}
#[test]
fn item_name_with_brackets_misinterpreted_as_group() {
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.model
.category_mut("Type")
.unwrap()
.add_item("Item [special]");
m.add_category("Month").unwrap();
m.model.category_mut("Month").unwrap().add_item("Jan");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let cat = loaded.model.category("Type").unwrap();
let item_names: Vec<&str> = cat.items.values().map(|i| i.name.as_str()).collect();
assert!(
item_names.contains(&"Item [special]"),
"Item name with brackets was misinterpreted as having a group.\n\
Got items: {item_names:?}\n{text}"
);
}
#[test]
fn category_name_with_comma_space_in_data() {
let mut m = Workbook::new("Test");
m.add_category("Income, Gross").unwrap();
m.model.category_mut("Income, Gross").unwrap().add_item("A");
m.add_category("Month").unwrap();
m.model.category_mut("Month").unwrap().add_item("Jan");
m.model.set_cell(
coord(&[("Income, Gross", "A"), ("Month", "Jan")]),
CellValue::Number(100.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Income, Gross", "A"), ("Month", "Jan")])),
Some(&CellValue::Number(100.0)),
"Category name with comma-space broke coord parsing.\n{text}"
);
}
#[test]
fn item_name_with_equals_sign_in_data() {
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.model.category_mut("Type").unwrap().add_item("A=B");
m.add_category("Month").unwrap();
m.model.category_mut("Month").unwrap().add_item("Jan");
m.model.set_cell(
coord(&[("Type", "A=B"), ("Month", "Jan")]),
CellValue::Number(50.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Type", "A=B"), ("Month", "Jan")])),
Some(&CellValue::Number(50.0)),
"Item name with '=' broke coord parsing.\n{text}"
);
}
#[test]
fn view_name_with_parentheses() {
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.model.category_mut("Type").unwrap().add_item("A");
m.create_view("My View (v2)");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert!(
loaded.views.contains_key("My View (v2)"),
"View with parens was corrupted.\nViews: {:?}\n{text}",
loaded.views.keys().collect::<Vec<_>>()
);
}
#[test]
fn multiple_tricky_text_cells() {
let mut m = Workbook::new("EdgeCases");
m.add_category("Dim").unwrap();
for item in ["A", "B", "C", "D"] {
m.model.category_mut("Dim").unwrap().add_item(item);
}
m.add_category("Msr").unwrap();
m.model.category_mut("Msr").unwrap().add_item("Val");
let cases: Vec<(&str, CellValue)> = vec![
("A", CellValue::Text("hello, world".into())),
("B", CellValue::Text(r#"a "quoted" thing"#.into())),
("C", CellValue::Text("x = y = z".into())),
("D", CellValue::Text("".into())),
];
for (item, value) in &cases {
m.model
.set_cell(coord(&[("Dim", item), ("Msr", "Val")]), value.clone());
}
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
for (item, expected) in &cases {
let got = loaded
.model
.get_cell(&coord(&[("Dim", item), ("Msr", "Val")]));
assert_eq!(
got,
Some(expected),
"Cell Dim={item} round-trip failed.\nExpected: {expected:?}\nGot: {got:?}\n{text}"
);
}
}
}
// ── Property-based parser tests ──────────────────────────────────────────────
#[cfg(test)]
mod parser_prop_tests {
use super::{format_md, parse_md};
use crate::model::cell::{CellKey, CellValue};
use crate::workbook::Workbook;
use proptest::prelude::*;
fn coord(pairs: &[(&str, &str)]) -> CellKey {
CellKey::new(
pairs
.iter()
.map(|(c, i)| (c.to_string(), i.to_string()))
.collect(),
)
}
/// Safe identifier: won't collide with format grammar.
fn safe_ident() -> impl Strategy<Value = String> {
"[A-Za-z][A-Za-z0-9_]{0,9}"
}
/// Text values designed to exercise parser edge cases.
fn tricky_text() -> impl Strategy<Value = String> {
prop_oneof![
"[a-zA-Z0-9 ]{0,20}",
"[a-zA-Z]+(, [a-zA-Z]+)*",
Just(r#"say "hi""#.to_string()),
Just("\"".to_string()),
Just("\"\"".to_string()),
Just("a = b".to_string()),
Just(" = ".to_string()),
Just("# not a header".to_string()),
Just("[bracketed]".to_string()),
// printable ASCII range
"[ -~]{0,30}",
Just("".to_string()),
]
}
fn cell_value() -> impl Strategy<Value = CellValue> {
prop_oneof![
prop::num::f64::NORMAL
.prop_filter("finite", |f| f.is_finite())
.prop_map(CellValue::Number),
(-1000i64..1000).prop_map(|n| CellValue::Number(n as f64)),
tricky_text().prop_map(CellValue::Text),
]
}
fn arbitrary_model() -> impl Strategy<Value = Workbook> {
let items1 = prop::collection::hash_set(safe_ident(), 1..=4);
let items2 = prop::collection::hash_set(safe_ident(), 1..=4);
let values = prop::collection::vec(cell_value(), 1..=8);
(safe_ident(), items1, items2, values).prop_map(|(name, items1, items2, values)| {
let mut m = Workbook::new(&name);
m.add_category("CatA").unwrap();
m.add_category("CatB").unwrap();
let items1: Vec<_> = items1.into_iter().collect();
let items2: Vec<_> = items2.into_iter().collect();
for item in &items1 {
m.model.category_mut("CatA").unwrap().add_item(item);
}
for item in &items2 {
m.model.category_mut("CatB").unwrap().add_item(item);
}
for (i, value) in values.into_iter().enumerate() {
let a = &items1[i % items1.len()];
let b = &items2[i % items2.len()];
m.model.set_cell(coord(&[("CatA", a), ("CatB", b)]), value);
}
m
})
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn roundtrip_preserves_model_name(name in safe_ident()) {
let m = Workbook::new(&name);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
prop_assert_eq!(loaded.model.name, name);
}
#[test]
fn roundtrip_preserves_categories_and_items(
items1 in prop::collection::hash_set(safe_ident(), 1..=5),
items2 in prop::collection::hash_set(safe_ident(), 1..=5),
) {
let mut m = Workbook::new("Test");
m.add_category("Alpha").unwrap();
m.add_category("Beta").unwrap();
for item in &items1 {
m.model.category_mut("Alpha").unwrap().add_item(item);
}
for item in &items2 {
m.model.category_mut("Beta").unwrap().add_item(item);
}
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let loaded_alpha: std::collections::HashSet<String> = loaded
.model.category("Alpha").unwrap()
.items.values().map(|i| i.name.clone()).collect();
let loaded_beta: std::collections::HashSet<String> = loaded
.model.category("Beta").unwrap()
.items.values().map(|i| i.name.clone()).collect();
prop_assert_eq!(loaded_alpha, items1);
prop_assert_eq!(loaded_beta, items2);
}
/// Double round-trip: format → parse → format should be idempotent.
#[test]
fn double_roundtrip_is_idempotent(model in arbitrary_model()) {
let text1 = format_md(&model);
let loaded = parse_md(&text1).unwrap();
let text2 = format_md(&loaded);
prop_assert_eq!(&text1, &text2,
"format→parse→format was not idempotent");
}
/// Tricky text values survive round-trip as cell values.
#[test]
fn tricky_text_value_roundtrips(text_val in tricky_text()) {
let mut m = Workbook::new("Test");
m.add_category("Dim").unwrap();
m.model.category_mut("Dim").unwrap().add_item("A");
m.add_category("Msr").unwrap();
m.model.category_mut("Msr").unwrap().add_item("V");
m.model.set_cell(
coord(&[("Dim", "A"), ("Msr", "V")]),
CellValue::Text(text_val.clone()),
);
let formatted = format_md(&m);
let loaded = parse_md(&formatted).unwrap();
let got = loaded.model.get_cell(&coord(&[("Dim", "A"), ("Msr", "V")]));
prop_assert_eq!(
got,
Some(&CellValue::Text(text_val.clone())),
"Text value {:?} did not round-trip.\n{}",
text_val, formatted
);
}
/// Cell count is preserved across round-trip.
#[test]
fn roundtrip_preserves_cell_count(model in arbitrary_model()) {
let original_count = model.model.data.iter_cells().count();
let text = format_md(&model);
let loaded = parse_md(&text).unwrap();
let loaded_count = loaded.model.data.iter_cells().count();
prop_assert_eq!(original_count, loaded_count,
"Cell count changed after round-trip");
}
/// Item names with special characters (brackets, backslashes) round-trip.
#[test]
fn tricky_item_names_roundtrip(
name in prop_oneof![
"[a-zA-Z]{1,8}",
Just("[bracketed]".to_string()),
Just("a\\b".to_string()),
Just("x [y]".to_string()),
Just("\\[escaped\\]".to_string()),
Just("name[0]".to_string()),
"[ -~]{1,15}",
]
) {
// Item names must not be empty/whitespace or start with markdown syntax
prop_assume!(!name.is_empty());
prop_assume!(name.trim() == name); // no leading/trailing whitespace
prop_assume!(!name.starts_with('#'));
prop_assume!(!name.starts_with('>'));
prop_assume!(!name.starts_with('-'));
let mut m = Workbook::new("Test");
m.add_category("Cat").unwrap();
m.model.category_mut("Cat").unwrap().add_item(&name);
m.add_category("Dim").unwrap();
m.model.category_mut("Dim").unwrap().add_item("X");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let cat = loaded.model.category("Cat").unwrap();
let item_names: Vec<&str> = cat.items.values().map(|i| i.name.as_str()).collect();
prop_assert!(item_names.contains(&name.as_str()),
"Item name {:?} did not round-trip.\nGot: {:?}\n{}",
name, item_names, text);
}
/// Category names with special characters used in data coordinates.
#[test]
fn tricky_category_names_in_data_roundtrip(
cat_name in prop_oneof![
"[a-zA-Z]{1,8}",
Just("Type, Sub".to_string()),
Just("A=B".to_string()),
Just("Cat`Name".to_string()),
Just("Income, Gross".to_string()),
]
) {
prop_assume!(!cat_name.is_empty());
prop_assume!(!cat_name.starts_with('#'));
let mut m = Workbook::new("Test");
m.add_category(&cat_name).unwrap();
m.model.category_mut(&cat_name).unwrap().add_item("X");
m.add_category("Other").unwrap();
m.model.category_mut("Other").unwrap().add_item("Y");
m.model.set_cell(
coord(&[(&cat_name, "X"), ("Other", "Y")]),
CellValue::Number(42.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let got = loaded.model.get_cell(&coord(&[(&cat_name, "X"), ("Other", "Y")]));
prop_assert_eq!(got, Some(&CellValue::Number(42.0)),
"Category name {:?} broke data round-trip.\n{}",
cat_name, text);
}
/// Item names with special characters used in data coordinates.
#[test]
fn tricky_item_names_in_data_roundtrip(
item_name in prop_oneof![
"[a-zA-Z]{1,8}",
Just("A=B".to_string()),
Just("X, Y".to_string()),
Just("Item`s".to_string()),
]
) {
prop_assume!(!item_name.is_empty());
prop_assume!(!item_name.starts_with('#'));
prop_assume!(!item_name.starts_with('-'));
prop_assume!(!item_name.starts_with('>'));
let mut m = Workbook::new("Test");
m.add_category("Cat").unwrap();
m.model.category_mut("Cat").unwrap().add_item(&item_name);
m.add_category("Dim").unwrap();
m.model.category_mut("Dim").unwrap().add_item("V");
m.model.set_cell(
coord(&[("Cat", &item_name), ("Dim", "V")]),
CellValue::Number(99.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let got = loaded.model.get_cell(&coord(&[("Cat", &item_name), ("Dim", "V")]));
prop_assert_eq!(got, Some(&CellValue::Number(99.0)),
"Item name {:?} broke data round-trip.\n{}",
item_name, text);
}
}
}
// ── Additional parser edge-case tests ────────────────────────────────────────
#[cfg(test)]
mod parser_edge_cases {
use super::{format_md, parse_md};
use crate::model::category::Group;
use crate::model::cell::{CellKey, CellValue};
use crate::workbook::Workbook;
fn coord(pairs: &[(&str, &str)]) -> CellKey {
CellKey::new(
pairs
.iter()
.map(|(c, i)| (c.to_string(), i.to_string()))
.collect(),
)
}
// ── Backtick quoting in coordinates ─────────────────────────────────
#[test]
fn backtick_in_category_name() {
let mut m = Workbook::new("Test");
m.add_category("Cat`s").unwrap();
m.model.category_mut("Cat`s").unwrap().add_item("A");
m.add_category("Dim").unwrap();
m.model.category_mut("Dim").unwrap().add_item("X");
m.model.set_cell(
coord(&[("Cat`s", "A"), ("Dim", "X")]),
CellValue::Number(1.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Cat`s", "A"), ("Dim", "X")])),
Some(&CellValue::Number(1.0)),
"Backtick in category name broke round-trip.\n{text}"
);
}
#[test]
fn item_name_with_both_equals_and_comma() {
let mut m = Workbook::new("Test");
m.add_category("Cat").unwrap();
m.model.category_mut("Cat").unwrap().add_item("a=1, b=2");
m.add_category("Dim").unwrap();
m.model.category_mut("Dim").unwrap().add_item("X");
m.model.set_cell(
coord(&[("Cat", "a=1, b=2"), ("Dim", "X")]),
CellValue::Number(7.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded
.model
.get_cell(&coord(&[("Cat", "a=1, b=2"), ("Dim", "X")])),
Some(&CellValue::Number(7.0)),
"Item with '=' and ', ' broke round-trip.\n{text}"
);
}
// ── View section edge cases ─────────────────────────────────────────
#[test]
fn hidden_item_with_slash_in_name() {
// hidden: Cat/Item — but what if item name contains '/'?
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.model.category_mut("Type").unwrap().add_item("A/B");
m.active_view_mut().hide_item("Type", "A/B");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
// This will likely fail — the parser splits on '/'
assert!(
loaded.active_view().is_hidden("Type", "A/B"),
"Hidden item with '/' in name was corrupted.\n{text}"
);
}
#[test]
fn collapsed_group_with_slash_in_name() {
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.active_view_mut().toggle_group_collapse("Type", "Q1/Q2");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert!(
loaded.active_view().is_group_collapsed("Type", "Q1/Q2"),
"Collapsed group with '/' in name was corrupted.\n{text}"
);
}
#[test]
fn view_name_ending_with_active_string() {
// View name "Not (active)" could be confused with the active marker
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.model.category_mut("Type").unwrap().add_item("A");
m.create_view("Not (active)");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert!(
loaded.views.contains_key("Not (active)"),
"View name ending with '(active)' was misinterpreted.\nViews: {:?}\n{text}",
loaded.views.keys().collect::<Vec<_>>()
);
}
// ── Group hierarchy edge cases ──────────────────────────────────────
#[test]
fn group_name_with_brackets() {
let mut m = Workbook::new("Test");
m.add_category("Month").unwrap();
m.model
.category_mut("Month")
.unwrap()
.add_item_in_group("Jan", "Q1 [2025]");
m.model
.category_mut("Month")
.unwrap()
.add_group(Group::new("Q1 [2025]").with_parent("Year"));
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let cat = loaded.model.category("Month").unwrap();
let jan = cat.items.values().find(|i| i.name == "Jan");
assert!(jan.is_some(), "Jan item missing after round-trip.\n{text}");
assert_eq!(
jan.unwrap().group.as_deref(),
Some("Q1 [2025]"),
"Group name with brackets was corrupted.\n{text}"
);
}
#[test]
fn item_in_group_where_item_has_brackets() {
// Item "Data [raw]" in group "Input" — the item name has brackets
// AND the item has a group.
let mut m = Workbook::new("Test");
m.add_category("Type").unwrap();
m.model
.category_mut("Type")
.unwrap()
.add_item_in_group("Data [raw]", "Input");
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let cat = loaded.model.category("Type").unwrap();
let item = cat.items.values().find(|i| i.name == "Data [raw]");
assert!(
item.is_some(),
"Item 'Data [raw]' with group not found.\nItems: {:?}\n{text}",
cat.items
.values()
.map(|i| (&i.name, &i.group))
.collect::<Vec<_>>()
);
assert_eq!(item.unwrap().group.as_deref(), Some("Input"));
}
// ── Malformed input resilience ──────────────────────────────────────
#[test]
fn parse_empty_string() {
let result = parse_md("");
assert!(
result.is_err() || result.unwrap().model.name.is_empty(),
"Empty input should either error or produce empty model"
);
}
#[test]
fn parse_just_model_name() {
let m = parse_md("v2025-04-09\n# MyModel\n").unwrap();
assert_eq!(m.model.name, "MyModel");
}
#[test]
fn parse_data_without_value() {
// Malformed data line: no " = " separator — pest rejects it
let text = "v2025-04-09\n# Test\n## Data\nType=Food\n";
assert!(parse_md(text).is_err());
}
#[test]
fn parse_data_with_empty_coords() {
// Data line with only value, no coordinates — pest rejects it
let text = "v2025-04-09\n# Test\n## Data\n = 42\n";
assert!(parse_md(text).is_err());
}
#[test]
fn parse_duplicate_categories() {
let text = "v2025-04-09\n# Test\n## Category: Type\n- A\n## Category: Type\n- B\n";
let m = parse_md(text).unwrap();
let cat = m.model.category("Type").unwrap();
let item_names: Vec<&str> = cat.items.values().map(|i| i.name.as_str()).collect();
assert!(!item_names.is_empty());
}
#[test]
fn parse_category_with_no_items() {
let text = "v2025-04-09\n# Test\n## Category: Empty\n## Category: Full\n- A\n";
let m = parse_md(text).unwrap();
assert!(m.model.category("Empty").is_some());
assert_eq!(m.model.category("Empty").unwrap().items.len(), 0);
assert_eq!(m.model.category("Full").unwrap().items.len(), 1);
}
// ── Number formatting edge cases ────────────────────────────────────
#[test]
fn number_negative_zero_roundtrips() {
let mut m = Workbook::new("Test");
m.add_category("A").unwrap();
m.model.category_mut("A").unwrap().add_item("X");
m.add_category("B").unwrap();
m.model.category_mut("B").unwrap().add_item("Y");
m.model
.set_cell(coord(&[("A", "X"), ("B", "Y")]), CellValue::Number(-0.0));
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let got = loaded.model.get_cell(&coord(&[("A", "X"), ("B", "Y")]));
match got {
Some(CellValue::Number(n)) => assert!(n.abs() == 0.0),
other => panic!("Expected Number(0.0), got {other:?}"),
}
}
#[test]
fn number_very_large_roundtrips() {
let mut m = Workbook::new("Test");
m.add_category("A").unwrap();
m.model.category_mut("A").unwrap().add_item("X");
m.add_category("B").unwrap();
m.model.category_mut("B").unwrap().add_item("Y");
m.model.set_cell(
coord(&[("A", "X"), ("B", "Y")]),
CellValue::Number(1.7976931348623157e308),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let got = loaded.model.get_cell(&coord(&[("A", "X"), ("B", "Y")]));
assert_eq!(
got,
Some(&CellValue::Number(1.7976931348623157e308)),
"f64::MAX did not round-trip.\n{text}"
);
}
#[test]
fn number_very_small_roundtrips() {
let mut m = Workbook::new("Test");
m.add_category("A").unwrap();
m.model.category_mut("A").unwrap().add_item("X");
m.add_category("B").unwrap();
m.model.category_mut("B").unwrap().add_item("Y");
m.model.set_cell(
coord(&[("A", "X"), ("B", "Y")]),
CellValue::Number(5e-324), // f64::MIN_POSITIVE subnormal
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let got = loaded.model.get_cell(&coord(&[("A", "X"), ("B", "Y")]));
assert_eq!(
got,
Some(&CellValue::Number(5e-324)),
"Subnormal float did not round-trip.\n{text}"
);
}
#[test]
fn number_pi_roundtrips() {
let mut m = Workbook::new("Test");
m.add_category("A").unwrap();
m.model.category_mut("A").unwrap().add_item("X");
m.add_category("B").unwrap();
m.model.category_mut("B").unwrap().add_item("Y");
m.model.set_cell(
coord(&[("A", "X"), ("B", "Y")]),
CellValue::Number(std::f64::consts::PI),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
let got = loaded.model.get_cell(&coord(&[("A", "X"), ("B", "Y")]));
assert_eq!(
got,
Some(&CellValue::Number(std::f64::consts::PI)),
"PI did not round-trip.\n{text}"
);
}
// ── Whitespace edge cases ───────────────────────────────────────────
#[test]
fn model_name_with_leading_trailing_spaces() {
let text = "v2025-04-09\n# Spaced Model \n";
let m = parse_md(text).unwrap();
// rest_of_line captures everything after "# "; we trim in the builder
assert_eq!(m.model.name, "Spaced Model");
}
#[test]
fn category_name_with_trailing_spaces() {
let text = "v2025-04-09\n# Test\n## Category: Trailing \n- Item\n";
let m = parse_md(text).unwrap();
// rest_of_line includes trailing spaces; we trim in the builder
assert!(m.model.category("Trailing").is_some());
}
#[test]
fn data_line_with_extra_whitespace() {
// With the pest grammar, extra whitespace in data lines is rejected
let text = "v2025-04-09\n# Test\n## Category: T\n- A\n## Category: M\n- J\n## Data\n T=A , M=J = 42 \n";
// pest grammar is strict about whitespace — this should fail
assert!(parse_md(text).is_err());
}
// ── Three-category model ────────────────────────────────────────────
#[test]
fn three_categories_round_trip() {
let mut m = Workbook::new("3D");
for cat in ["Region", "Product", "Year"] {
m.add_category(cat).unwrap();
}
m.model.category_mut("Region").unwrap().add_item("East");
m.model.category_mut("Region").unwrap().add_item("West");
m.model.category_mut("Product").unwrap().add_item("Widget");
m.model.category_mut("Year").unwrap().add_item("2025");
m.model.set_cell(
coord(&[("Region", "East"), ("Product", "Widget"), ("Year", "2025")]),
CellValue::Number(1000.0),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded.model.get_cell(&coord(&[
("Region", "East"),
("Product", "Widget"),
("Year", "2025")
])),
Some(&CellValue::Number(1000.0)),
"3-category cell did not round-trip.\n{text}"
);
}
// ── Text value with backslash ───────────────────────────────────────
#[test]
fn text_value_with_backslash() {
let mut m = Workbook::new("Test");
m.add_category("A").unwrap();
m.model.category_mut("A").unwrap().add_item("X");
m.add_category("B").unwrap();
m.model.category_mut("B").unwrap().add_item("Y");
m.model.set_cell(
coord(&[("A", "X"), ("B", "Y")]),
CellValue::Text("C:\\Users\\file.txt".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded.model.get_cell(&coord(&[("A", "X"), ("B", "Y")])),
Some(&CellValue::Text("C:\\Users\\file.txt".into())),
"Backslash in text was corrupted.\n{text}"
);
}
#[test]
fn text_value_with_backslash_n_literal() {
// The literal string "\n" (two chars) should not become a newline
let mut m = Workbook::new("Test");
m.add_category("A").unwrap();
m.model.category_mut("A").unwrap().add_item("X");
m.add_category("B").unwrap();
m.model.category_mut("B").unwrap().add_item("Y");
m.model.set_cell(
coord(&[("A", "X"), ("B", "Y")]),
CellValue::Text("literal \\n not newline".into()),
);
let text = format_md(&m);
let loaded = parse_md(&text).unwrap();
assert_eq!(
loaded.model.get_cell(&coord(&[("A", "X"), ("B", "Y")])),
Some(&CellValue::Text("literal \\n not newline".into())),
"Literal backslash-n was corrupted.\n{text}"
);
}
}
// ── Grammar-walking file generator ───────────────────────────────────────────
//
// Parses `improv.pest` at test time and walks the AST to generate random valid
// files. The generator and parser share a single source of truth: the grammar.
#[cfg(test)]
mod generator {
use pest_meta::ast::{Expr, RuleType};
use pest_meta::parser;
use proptest::prelude::*;
use std::collections::HashMap;
/// Parse the grammar file and return rules keyed by name.
fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
let grammar = include_str!("improv.pest");
let pairs = parser::parse(parser::Rule::grammar_rules, grammar)
.unwrap_or_else(|e| panic!("Bad grammar: {e}"));
let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
rules
.into_iter()
.map(|r| (r.name.clone(), (r.ty, r.expr)))
.collect()
}
/// Recursive string generator driven by a pest `Expr`.
///
/// `choices` is consumed left-to-right for every decision point (Choice,
/// Opt, Rep). If it runs out we pick the "smallest" alternative (first
/// branch, no repetition, skip optional).
struct Gen<'g> {
rules: &'g HashMap<String, (RuleType, Expr)>,
choices: Vec<u8>,
pos: usize,
}
impl<'g> Gen<'g> {
fn new(rules: &'g HashMap<String, (RuleType, Expr)>, choices: Vec<u8>) -> Self {
Self {
rules,
choices,
pos: 0,
}
}
/// Consume one byte of entropy, defaulting to 0.
fn pick(&mut self) -> u8 {
let v = self.choices.get(self.pos).copied().unwrap_or(0);
self.pos += 1;
v
}
fn emit(&mut self, expr: &Expr, out: &mut String) {
match expr {
Expr::Str(s) => out.push_str(s),
Expr::Range(lo, hi) => {
let lo = lo.chars().next().unwrap() as u32;
let hi = hi.chars().next().unwrap() as u32;
let range = hi - lo + 1;
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
out.push(ch);
}
Expr::Ident(name) => {
// Built-in pest rules
match name.as_str() {
"ANY" => {
let ch = (b'a' + self.pick() % 26) as char;
out.push(ch);
}
"NEWLINE" => out.push('\n'),
"SOI" | "EOI" => {}
"ASCII_DIGIT" => {
let d = (b'0' + self.pick() % 10) as char;
out.push(d);
}
_ => {
// Look up user-defined rule
if let Some((_ty, expr)) = self.rules.get(name) {
self.emit(expr, out);
}
}
}
}
Expr::Seq(a, b) => {
self.emit(a, out);
self.emit(b, out);
}
Expr::Choice(a, b) => {
// Collect all choices (right-associated)
let mut alts: Vec<&Expr> = vec![a.as_ref()];
let mut cur = b.as_ref();
while let Expr::Choice(l, r) = cur {
alts.push(l.as_ref());
cur = r.as_ref();
}
alts.push(cur);
let idx = self.pick() as usize % alts.len();
self.emit(alts[idx], out);
}
Expr::Opt(inner) => {
if !self.pick().is_multiple_of(3) {
// ~66% chance of emitting
self.emit(inner, out);
}
}
Expr::Rep(inner) => {
// 0..N repetitions
let count = self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::RepOnce(inner) => {
// 1..N repetitions
let count = 1 + self.pick() % 3;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::NegPred(_) | Expr::PosPred(_) => {
// Lookaheads don't produce output
}
_ => {
// Skip unsupported expressions
}
}
}
fn generate(&mut self, rule_name: &str) -> String {
let mut out = String::new();
if let Some((_ty, expr)) = self.rules.get(rule_name).cloned() {
self.emit(&expr, &mut out);
}
out
}
}
/// Proptest strategy: generate a valid `.improv` file by walking the grammar.
pub fn improv_file() -> impl Strategy<Value = String> {
// Use random bytes as entropy for choices in the grammar walk
prop::collection::vec(any::<u8>(), 64..=256).prop_map(|choices| {
let rules = load_grammar();
let mut g = Gen::new(&rules, choices);
g.generate("file")
})
}
}
#[cfg(test)]
mod grammar_prop_tests {
use super::{format_md, generator, parse_md};
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
/// parse(generate()) — every generated file parses without error.
#[test]
fn generated_file_parses(file in generator::improv_file()) {
let result = parse_md(&file);
prop_assert!(result.is_ok(),
"Generated file failed to parse:\n{}\nError: {}",
file, result.unwrap_err());
}
/// parse(print(parse(generate()))) — round-trip through format is stable.
#[test]
fn generated_file_roundtrips(file in generator::improv_file()) {
let result1 = parse_md(&file);
// Skip inputs that don't parse (the grammar walk may produce
// degenerate inputs like empty model names)
prop_assume!(result1.is_ok());
let model1 = result1.unwrap();
let printed = format_md(&model1);
let model2_result = parse_md(&printed);
prop_assert!(model2_result.is_ok(),
"Re-formatted file failed to parse:\n{}\nError: {}",
printed, model2_result.unwrap_err());
let model2 = model2_result.unwrap();
// Model name preserved
prop_assert_eq!(&model1.model.name, &model2.model.name);
// Category count preserved
prop_assert_eq!(
model1.model.categories.len(),
model2.model.categories.len(),
"Category count changed"
);
// Cell count preserved
let count1 = model1.model.data.iter_cells().count();
let count2 = model2.model.data.iter_cells().count();
prop_assert_eq!(count1, count2,
"Cell count changed: {} → {}\nOriginal:\n{}\nRe-formatted:\n{}",
count1, count2, file, printed);
// Double round-trip: format(parse(format(parse(gen)))) == format(parse(gen))
let printed2 = format_md(&model2);
prop_assert_eq!(&printed, &printed2,
"format→parse→format not idempotent");
}
}
}