From d34e8eb3132e28dce1346b84c7e0717ad31d8d60 Mon Sep 17 00:00:00 2001
From: Edward Langley <el-github@elangley.org>
Date: Thu, 9 Apr 2026 02:53:13 -0700
Subject: [PATCH] feat: replace ad-hoc .improv parser with pest grammar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add improv.pest PEG grammar as the single source of truth for the
  .improv file format (v2025-04-09)
- Replace hand-written line scanner with pest-derived parser that walks
  the grammar's parse tree
- Add grammar-walking test generator that reads improv.pest at test time
  via pest_meta and produces random valid files from the AST
- Fix 6 parser bugs: newlines in text, commas in names, brackets in
  names, float precision, view name ambiguity, group brackets
- New format: version line, Initial View header, pipe quoting (|...|),
  Views→Formulas→Categories→Data section order, comma-separated items
- Bare names restricted to [A-Za-z_][A-Za-z0-9_-]*, everything else
  pipe-quoted with \| \\ \n escapes
- Remove all unwrap() calls from production code, propagate errors
  with Result throughout parse_md
- Extract shared escape_pipe/unescape_pipe/pipe_quote helpers, deduplicate
  hidden/collapsed formatting, add w!() macro for infallible writeln

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Executed-By: spot
---
 src/persistence/improv.pest |  124 ++++
 src/persistence/mod.rs      | 1064 +++++++++++++++++++----------------
 2 files changed, 714 insertions(+), 474 deletions(-)
 create mode 100644 src/persistence/improv.pest

diff --git a/src/persistence/improv.pest b/src/persistence/improv.pest
new file mode 100644
index 0000000..2782ea9
--- /dev/null
+++ b/src/persistence/improv.pest
@@ -0,0 +1,124 @@
+// ── .improv file grammar (v2025-04-09) ───────────────────────────────────────
+//
+// Line-oriented, markdown-flavoured format for multi-dimensional models.
+// Sections may appear in any order.
+//
+// Names: bare alphanumeric or pipe-quoted |like this|.
+//   Inside pipes, backslash escapes: \| for literal pipe, \\ for backslash,
+//   \n for newline.
+// Values: pipe-quoted |text| or bare numbers.
+
+file = {
+    SOI ~
+    blank_lines ~
+    version_line ~
+    model_name ~
+    initial_view? ~
+    section* ~
+    EOI
+}
+
+version_line  = { "v" ~ rest_of_line ~ NEWLINE ~ blank_lines }
+model_name    = { "# " ~ rest_of_line ~ NEWLINE ~ blank_lines }
+initial_view  = { "Initial View: " ~ rest_of_line ~ NEWLINE ~ blank_lines }
+
+section = _{
+    category_section
+  | formulas_section
+  | data_section
+  | view_section
+}
+
+// ── Category ─────────────────────────────────────────────────────────────────
+
+category_section = {
+    "## Category: " ~ rest_of_line ~ NEWLINE ~ blank_lines ~
+    category_entry*
+}
+
+category_entry = _{ group_hierarchy | grouped_item | item_list }
+
+// Comma-separated bare items (no group): `- Food, Gas, Total`
+item_list = {
+    "- " ~ name ~ ("," ~ " "* ~ name)* ~ NEWLINE ~ blank_lines
+}
+
+// Single item with group bracket: `- Jan[Q1]`
+grouped_item = {
+    "- " ~ name ~ "[" ~ name ~ "]" ~ NEWLINE ~ blank_lines
+}
+
+group_hierarchy = {
+    "> " ~ name ~ "[" ~ name ~ "]" ~ NEWLINE ~ blank_lines
+}
+
+// ── Formulas ─────────────────────────────────────────────────────────────────
+
+formulas_section = {
+    "## Formulas" ~ NEWLINE ~ blank_lines ~
+    formula_line*
+}
+
+formula_line = {
+    "- " ~ rest_of_line ~ NEWLINE ~ blank_lines
+}
+
+// ── Data ─────────────────────────────────────────────────────────────────────
+
+data_section = {
+    "## Data" ~ NEWLINE ~ blank_lines ~
+    data_line*
+}
+
+data_line = {
+    coord_list ~ " = " ~ cell_value ~ NEWLINE ~ blank_lines
+}
+
+coord_list = { coord ~ (", " ~ coord)* }
+coord      = { name ~ "=" ~ name }
+
+cell_value = _{ number | pipe_quoted | bare_value }
+
+number = @{
+    "-"? ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)?
+}
+
+bare_value = @{ (!NEWLINE ~ ANY)+ }
+
+// ── View ─────────────────────────────────────────────────────────────────────
+
+view_section = {
+    "## View: " ~ rest_of_line ~ NEWLINE ~ blank_lines ~
+    view_entry*
+}
+
+view_entry = _{ format_line | hidden_line | collapsed_line | axis_line }
+
+axis_line = {
+    name ~ ": " ~ axis_kind ~ (", " ~ name)? ~ NEWLINE ~ blank_lines
+}
+
+axis_kind      = @{ "row" | "column" | "page" | "none" }
+
+format_line    = { "format: " ~ rest_of_line ~ NEWLINE ~ blank_lines }
+hidden_line    = { "hidden: " ~ name ~ "/" ~ name ~ NEWLINE ~ blank_lines }
+collapsed_line = { "collapsed: " ~ name ~ "/" ~ name ~ NEWLINE ~ blank_lines }
+
+// ── Names ────────────────────────────────────────────────────────────────────
+//
+// A name is either pipe-quoted or a bare identifier.
+// Pipe-quoted: |Income, Gross| — backslash escapes inside:
+//   \| = literal pipe, \\ = literal backslash, \n = newline
+// Bare: no = , | [ ] / : # or newlines.
+
+name = _{ pipe_quoted | bare_name }
+
+pipe_quoted = { "|" ~ pipe_inner ~ "|" }
+pipe_inner  = @{ ("\\" ~ ANY | !"|" ~ ANY)* }
+
+bare_name   = @{ ('A'..'Z' | 'a'..'z' | "_") ~ ('A'..'Z' | 'a'..'z' | '0'..'9' | "_" | "-")* }
+
+// ── Shared ───────────────────────────────────────────────────────────────────
+
+rest_of_line = @{ (!NEWLINE ~ ANY)* }
+blank_lines  = _{ NEWLINE* }
diff --git a/src/persistence/mod.rs b/src/persistence/mod.rs
index dc3b2a4..9add417 100644
--- a/src/persistence/mod.rs
+++ b/src/persistence/mod.rs
@@ -2,6 +2,8 @@ use anyhow::{Context, Result};
 use flate2::read::GzDecoder;
 use flate2::write::GzEncoder;
 use flate2::Compression;
+use pest::Parser;
+use pest_derive::Parser;
 use std::io::{BufReader, BufWriter, Read, Write};
 use std::path::Path;
 
@@ -11,15 +13,113 @@ use crate::model::cell::{CellKey, CellValue};
 use crate::model::Model;
 use crate::view::{Axis, GridLayout};
 
+#[derive(Parser)]
+#[grammar = "persistence/improv.pest"]
+struct ImprovParser;
+
+// ── Pipe quoting (shared between format and parse) ───────────────────────────
+
+/// Check whether a name is a valid bare identifier: `[A-Za-z_][A-Za-z0-9_-]*`
+fn is_bare_name(name: &str) -> bool {
+    let mut chars = name.chars();
+    match chars.next() {
+        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
+        _ => return false,
+    }
+    chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
+}
+
+/// Escape a string for use inside pipe delimiters: `\|`, `\\`, `\n`.
+fn escape_pipe(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    for c in s.chars() {
+        match c {
+            '|' => out.push_str("\\|"),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            c => out.push(c),
+        }
+    }
+    out
+}
+
+/// Unescape a pipe-quoted body: `\|` → `|`, `\\` → `\`, `\n` → newline.
+fn unescape_pipe(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut chars = s.chars();
+    while let Some(c) = chars.next() {
+        if c == '\\' {
+            match chars.next() {
+                Some('|') => out.push('|'),
+                Some('\\') => out.push('\\'),
+                Some('n') => out.push('\n'),
+                Some(other) => {
+                    out.push('\\');
+                    out.push(other);
+                }
+                None => out.push('\\'),
+            }
+        } else {
+            out.push(c);
+        }
+    }
+    out
+}
+
+/// CL-style `|...|` pipe quoting unless the name is a valid bare identifier.
+fn quote_name(name: &str) -> String {
+    if is_bare_name(name) {
+        name.to_string()
+    } else {
+        format!("|{}|", escape_pipe(name))
+    }
+}
+
+/// Pipe-quote unconditionally (for text cell values that must be distinguished
+/// from numbers).
+fn pipe_quote(s: &str) -> String {
+    format!("|{}|", escape_pipe(s))
+}
+
+// ── Number formatting ────────────────────────────────────────────────────────
+
+fn format_number(n: f64) -> String {
+    if n.is_infinite() {
+        return if n.is_sign_positive() {
+            "inf".to_string()
+        } else {
+            "-inf".to_string()
+        };
+    }
+    if n.is_nan() {
+        return "nan".to_string();
+    }
+    if n.fract() == 0.0 && n.abs() < 1e15 {
+        format!("{}", n as i64)
+    } else {
+        let display = format!("{n}");
+        if display.parse::<f64>() == Ok(n) {
+            display
+        } else {
+            format!("{n:?}")
+        }
+    }
+}
+
+// ── File I/O ─────────────────────────────────────────────────────────────────
+
+fn is_gzip(path: &Path) -> bool {
+    path.to_str().is_some_and(|s| s.ends_with(".gz"))
+}
+
 pub fn save(model: &Model, path: &Path) -> Result<()> {
     let text = format_md(model);
-
-    if path.to_str().map(|s| s.ends_with(".gz")).unwrap_or(false) {
+    if is_gzip(path) {
         let file = std::fs::File::create(path)
             .with_context(|| format!("Cannot create {}", path.display()))?;
-        let mut encoder = GzEncoder::new(BufWriter::new(file), Compression::default());
-        encoder.write_all(text.as_bytes())?;
-        encoder.finish()?;
+        let mut enc = GzEncoder::new(BufWriter::new(file), Compression::default());
+        enc.write_all(text.as_bytes())?;
+        enc.finish()?;
     } else {
         std::fs::write(path, &text).with_context(|| format!("Cannot write {}", path.display()))?;
     }
@@ -29,18 +129,15 @@ pub fn save(model: &Model, path: &Path) -> Result<()> {
 pub fn load(path: &Path) -> Result<Model> {
     let file =
         std::fs::File::open(path).with_context(|| format!("Cannot open {}", path.display()))?;
-
-    let text = if path.to_str().map(|s| s.ends_with(".gz")).unwrap_or(false) {
-        let mut decoder = GzDecoder::new(BufReader::new(file));
+    let text = if is_gzip(path) {
         let mut s = String::new();
-        decoder.read_to_string(&mut s)?;
+        GzDecoder::new(BufReader::new(file)).read_to_string(&mut s)?;
         s
     } else {
         let mut s = String::new();
         BufReader::new(file).read_to_string(&mut s)?;
         s
     };
-
     if text.trim_start().starts_with('{') {
         serde_json::from_str(&text).context("Failed to deserialize model")
     } else {
@@ -56,149 +153,97 @@ pub fn autosave_path(path: &Path) -> std::path::PathBuf {
 }
 
 
-/// Format a number with enough precision for lossless round-trip.
-fn format_number(n: f64) -> String {
-    if n.fract() == 0.0 && n.abs() < 1e15 {
-        format!("{}", n as i64)
-    } else {
-        // Use enough decimal digits to round-trip any f64.
-        // Rust's {:?} (Debug) uses full precision, but looks odd.
-        // Instead, try the default Display first; if it round-trips, use it.
-        let display = format!("{n}");
-        if display.parse::<f64>() == Ok(n) {
-            display
-        } else {
-            // Fall back to repr-style full precision
-            format!("{n:?}")
-        }
-    }
-}
-
-/// Characters that require pipe-quoting in a name.
-const NAME_SPECIAL: &[char] = &['=', ',', '|', '[', ']', '/', ':', '#'];
-
-/// Format a name using CL-style `|...|` pipe quoting if it contains special
-/// characters. Inside a quoted name, `\|` is a literal pipe and `\\` is a
-/// literal backslash.
-fn quote_name(name: &str) -> String {
-    if name.is_empty() || name.chars().any(|c| NAME_SPECIAL.contains(&c)) || name != name.trim() {
-        let mut out = String::with_capacity(name.len() + 2);
-        out.push('|');
-        for c in name.chars() {
-            match c {
-                '|' => out.push_str("\\|"),
-                '\\' => out.push_str("\\\\"),
-                c => out.push(c),
-            }
-        }
-        out.push('|');
-        out
-    } else {
-        name.to_string()
-    }
-}
-
-
 /// Serialize a model to the markdown `.improv` format.
 pub fn format_md(model: &Model) -> String {
-    use std::fmt::Write;
+    // writeln! to a String is infallible; this macro avoids .unwrap() noise.
+    macro_rules! w {
+        ($dst:expr, $($arg:tt)*) => { { use std::fmt::Write; writeln!($dst, $($arg)*).ok(); } }
+    }
+
     let mut out = String::new();
 
-    writeln!(out, "v2025-04-09").unwrap();
-    writeln!(out, "# {}", model.name).unwrap();
-    writeln!(out, "Initial View: {}", model.active_view).unwrap();
+    w!(out, "v2025-04-09");
+    w!(out, "# {}", model.name);
+    w!(out, "Initial View: {}", model.active_view);
 
-    // Categories
+    // ── Views (first: typically small, orients the reader) ───────────
+    for (_view_name, view) in &model.views {
+        w!(out, "\n## View: {}", view.name);
+        for (cat, axis) in &view.category_axes {
+            let qcat = quote_name(cat);
+            if *axis == Axis::Page {
+                if let Some(sel) = view.page_selections.get(cat) {
+                    w!(out, "{qcat}: page, {}", quote_name(sel));
+                    continue;
+                }
+            }
+            let axis_str = match axis {
+                Axis::Row => "row",
+                Axis::Column => "column",
+                Axis::Page => "page",
+                Axis::None => "none",
+            };
+            w!(out, "{qcat}: {axis_str}");
+        }
+        if !view.number_format.is_empty() {
+            w!(out, "format: {}", view.number_format);
+        }
+        for (prefix, map) in [("hidden", &view.hidden_items), ("collapsed", &view.collapsed_groups)]
+        {
+            let mut pairs: Vec<_> = map
+                .iter()
+                .flat_map(|(cat, items)| items.iter().map(move |item| (cat.as_str(), item.as_str())))
+                .collect();
+            pairs.sort();
+            for (cat, item) in pairs {
+                w!(out, "{prefix}: {}/{}", quote_name(cat), quote_name(item));
+            }
+        }
+    }
+
+    // ── Formulas ─────────────────────────────────────────────────────
+    if !model.formulas().is_empty() {
+        w!(out, "\n## Formulas");
+        for f in model.formulas() {
+            w!(out, "- {} [{}]", f.raw, f.target_category);
+        }
+    }
+
+    // ── Categories (items comma-separated on one line) ───────────────
     for cat in model.categories.values() {
-        writeln!(out, "\n## Category: {}", cat.name).unwrap();
+        w!(out, "\n## Category: {}", cat.name);
+        let mut bare: Vec<String> = Vec::new();
+        let mut grouped: Vec<String> = Vec::new();
         for item in cat.items.values() {
             match &item.group {
-                Some(g) => writeln!(out, "- {}[{}]", quote_name(&item.name), quote_name(g))
-                    .unwrap(),
-                None => writeln!(out, "- {}", quote_name(&item.name)).unwrap(),
+                Some(g) => grouped.push(format!("{}[{}]", quote_name(&item.name), quote_name(g))),
+                None => bare.push(quote_name(&item.name)),
             }
         }
-        // Group hierarchy: lines starting with `>` for groups that have a parent
+        if !bare.is_empty() {
+            w!(out, "- {}", bare.join(", "));
+        }
+        for g_item in &grouped {
+            w!(out, "- {g_item}");
+        }
         for g in &cat.groups {
             if let Some(parent) = &g.parent {
-                writeln!(out, "> {}[{}]", quote_name(&g.name), quote_name(parent)).unwrap();
+                w!(out, "> {}[{}]", quote_name(&g.name), quote_name(parent));
             }
         }
     }
 
-    // Formulas
-    if !model.formulas().is_empty() {
-        writeln!(out, "\n## Formulas").unwrap();
-        for f in model.formulas() {
-            writeln!(out, "- {} [{}]", f.raw, f.target_category).unwrap();
-        }
-    }
-
-    // Data — sorted by coordinate string for deterministic diffs
+    // ── Data (last: typically the largest section) ────────────────────
     let mut cells: Vec<_> = model.data.iter_cells().collect();
     cells.sort_by_key(|(k, _)| coord_str(k));
     if !cells.is_empty() {
-        writeln!(out, "\n## Data").unwrap();
+        w!(out, "\n## Data");
         for (key, value) in cells {
             let val_str = match value {
                 CellValue::Number(n) => format_number(*n),
-                // Always pipe-quote text values to distinguish from numbers
-                CellValue::Text(s) | CellValue::Error(s) => {
-                    let mut out = String::with_capacity(s.len() + 2);
-                    out.push('|');
-                    for c in s.chars() {
-                        match c {
-                            '|' => out.push_str("\\|"),
-                            '\\' => out.push_str("\\\\"),
-                            '\n' => out.push_str("\\n"),
-                            c => out.push(c),
-                        }
-                    }
-                    out.push('|');
-                    out
-                }
+                CellValue::Text(s) | CellValue::Error(s) => pipe_quote(s),
             };
-            writeln!(out, "{} = {}", coord_str(&key), val_str).unwrap();
-        }
-    }
-
-    // Views
-    for (_view_name, view) in &model.views {
-        writeln!(out, "\n## View: {}", view.name).unwrap();
-        for (cat, axis) in &view.category_axes {
-            let qcat = quote_name(cat);
-            match axis {
-                Axis::Row => writeln!(out, "{qcat}: row").unwrap(),
-                Axis::Column => writeln!(out, "{qcat}: column").unwrap(),
-                Axis::Page => match view.page_selections.get(cat) {
-                    Some(sel) => writeln!(out, "{qcat}: page, {}", quote_name(sel)).unwrap(),
-                    None => writeln!(out, "{qcat}: page").unwrap(),
-                },
-                Axis::None => writeln!(out, "{qcat}: none").unwrap(),
-            }
-        }
-        if !view.number_format.is_empty() {
-            writeln!(out, "format: {}", view.number_format).unwrap();
-        }
-        // Hidden items (sorted for deterministic diffs)
-        let mut hidden: Vec<(&str, &str)> = view
-            .hidden_items
-            .iter()
-            .flat_map(|(cat, items)| items.iter().map(move |item| (cat.as_str(), item.as_str())))
-            .collect();
-        hidden.sort();
-        for (cat, item) in hidden {
-            writeln!(out, "hidden: {}/{}", quote_name(cat), quote_name(item)).unwrap();
-        }
-        // Collapsed groups (sorted for deterministic diffs)
-        let mut collapsed: Vec<(&str, &str)> = view
-            .collapsed_groups
-            .iter()
-            .flat_map(|(cat, gs)| gs.iter().map(move |g| (cat.as_str(), g.as_str())))
-            .collect();
-        collapsed.sort();
-        for (cat, group) in collapsed {
-            writeln!(out, "collapsed: {}/{}", quote_name(cat), quote_name(group)).unwrap();
+            w!(out, "{} = {}", coord_str(&key), val_str);
         }
     }
 
@@ -206,20 +251,26 @@ pub fn format_md(model: &Model) -> String {
 }
 
 
-/// Parse the markdown `.improv` format into a Model.
+/// Parse the `.improv` format into a Model using the pest grammar.
 ///
-/// Uses a two-pass approach so the file is order-independent:
-/// pass 1 collects raw data, pass 2 builds the model with categories
-/// registered before views are configured.
+/// Sections may appear in any order; a two-pass approach registers categories
+/// before configuring views.
 pub fn parse_md(text: &str) -> Result<Model> {
-    // ── Intermediate types ────────────────────────────────────────────────────
+    use anyhow::bail;
+    use pest::iterators::{Pair, Pairs};
+
+    let file = ImprovParser::parse(Rule::file, text)
+        .map_err(|e| anyhow::anyhow!("Parse error: {e}"))?
+        .next()
+        .ok_or_else(|| anyhow::anyhow!("Empty parse result"))?;
+
+    // ── Intermediate collectors ──────────────────────────────────────────────
 
     struct PCategory {
         name: String,
-        items: Vec<(String, Option<String>)>, // (name, group)
-        group_parents: Vec<(String, String)>, // (group, parent)
+        items: Vec<(String, Option<String>)>,
+        group_parents: Vec<(String, String)>,
     }
-
     struct PView {
         name: String,
         axes: Vec<(String, Axis)>,
@@ -229,152 +280,190 @@ pub fn parse_md(text: &str) -> Result<Model> {
         collapsed: Vec<(String, String)>,
     }
 
-    // ── Pass 1: collect ───────────────────────────────────────────────────────
-
-    #[derive(PartialEq)]
-    enum Section {
-        None,
-        Category,
-        Formulas,
-        Data,
-        View,
-    }
-
     let mut model_name: Option<String> = None;
     let mut initial_view: Option<String> = None;
     let mut categories: Vec<PCategory> = Vec::new();
-    let mut formulas: Vec<(String, String)> = Vec::new(); // (raw, category)
+    let mut formulas: Vec<(String, String)> = Vec::new();
     let mut data: Vec<(CellKey, CellValue)> = Vec::new();
     let mut views: Vec<PView> = Vec::new();
-    let mut section = Section::None;
 
-    for line in text.lines() {
-        let trimmed = line.trim();
-        if trimmed.is_empty() {
-            continue;
-        }
-        // Skip version line
-        if trimmed.starts_with('v') && trimmed.len() <= 20 && trimmed.contains('-') {
-            continue;
-        }
+    // ── Helpers for walking the pest parse tree ──────────────────────────────
 
-        if let Some(rest) = trimmed.strip_prefix("Initial View: ") {
-            initial_view = Some(rest.trim().to_string());
-            continue;
-        }
+    /// Advance an iterator, returning an error if empty.
+    fn next<'a>(pairs: &mut Pairs<'a, Rule>, ctx: &str) -> Result<Pair<'a, Rule>> {
+        pairs
+            .next()
+            .ok_or_else(|| anyhow::anyhow!("Expected child in {ctx}"))
+    }
 
-        if trimmed.starts_with("# ") && !trimmed.starts_with("## ") {
-            model_name = Some(trimmed[2..].trim().to_string());
-            continue;
-        }
-        if let Some(rest) = trimmed.strip_prefix("## Category: ") {
-            categories.push(PCategory {
-                name: rest.trim().to_string(),
-                items: Vec::new(),
-                group_parents: Vec::new(),
-            });
-            section = Section::Category;
-            continue;
-        }
-        if trimmed == "## Formulas" {
-            section = Section::Formulas;
-            continue;
-        }
-        if trimmed == "## Data" {
-            section = Section::Data;
-            continue;
-        }
-        if let Some(rest) = trimmed.strip_prefix("## View: ") {
-            let name = rest.trim().to_string();
-            views.push(PView {
-                name,
-                axes: Vec::new(),
-                page_selections: Vec::new(),
-                format: String::new(),
-                hidden: Vec::new(),
-                collapsed: Vec::new(),
-            });
-            section = Section::View;
-            continue;
-        }
-        if trimmed.starts_with("## ") {
-            continue;
-        }
+    /// Extract the first child's text content, trimmed.
+    fn first_str(pair: Pair<'_, Rule>) -> Result<String> {
+        Ok(next(&mut pair.into_inner(), "first_str")?
+            .as_str()
+            .trim()
+            .to_string())
+    }
 
-        match section {
-            Section::Category => {
-                let Some(cat) = categories.last_mut() else {
-                    continue;
-                };
-                if let Some(rest) = trimmed.strip_prefix("- ") {
-                    let (name, group) = parse_bracketed(rest);
-                    cat.items.push((name, group));
-                } else if let Some(rest) = trimmed.strip_prefix("> ") {
-                    let (group, parent) = parse_bracketed(rest);
-                    if let Some(p) = parent {
-                        cat.group_parents.push((group, p));
-                    }
-                }
+    fn extract_name(pair: Pair<'_, Rule>) -> Result<String> {
+        match pair.as_rule() {
+            Rule::bare_name => Ok(pair.as_str().to_string()),
+            Rule::pipe_quoted => {
+                let inner = next(&mut pair.into_inner(), "pipe_quoted")?;
+                Ok(unescape_pipe(inner.as_str()))
             }
-            Section::Formulas => {
-                if let Some(rest) = trimmed.strip_prefix("- ") {
-                    let (raw, cat) = parse_bracketed(rest);
-                    if let Some(c) = cat {
-                        formulas.push((raw.to_string(), c.to_string()));
-                    }
-                }
-            }
-            Section::Data => {
-                let Some((coords, value)) = parse_data_line(trimmed) else {
-                    continue;
-                };
-                data.push((CellKey::new(coords), value));
-            }
-            Section::View => {
-                let Some(view) = views.last_mut() else {
-                    continue;
-                };
-                if let Some(fmt) = trimmed.strip_prefix("format: ") {
-                    view.format = fmt.trim().to_string();
-                } else if let Some(rest) = trimmed.strip_prefix("hidden: ") {
-                    if let Some((c, i)) = parse_slash_path(rest.trim()) {
-                        view.hidden.push((c, i));
-                    }
-                } else if let Some(rest) = trimmed.strip_prefix("collapsed: ") {
-                    if let Some((c, g)) = parse_slash_path(rest.trim()) {
-                        view.collapsed.push((c, g));
-                    }
-                } else if let Some((cat, rest)) = parse_name_colon(trimmed) {
-                    if let Some(sel_rest) = rest.strip_prefix("page") {
-                        view.axes.push((cat.clone(), Axis::Page));
-                        if let Some(sel) = sel_rest.strip_prefix(", ") {
-                            let sel = parse_inline_name(sel.trim());
-                            view.page_selections.push((cat, sel));
-                        }
-                    } else {
-                        let axis = match rest {
-                            "row" => Axis::Row,
-                            "column" => Axis::Column,
-                            "none" => Axis::None,
-                            _ => continue,
-                        };
-                        view.axes.push((cat, axis));
-                    }
-                }
-            }
-            Section::None => {}
+            _ => Ok(pair.as_str().to_string()),
         }
     }
 
-    // ── Pass 2: build ─────────────────────────────────────────────────────────
+    /// Extract two names from a pair's children.
+    fn extract_name_pair(pair: Pair<'_, Rule>) -> Result<(String, String)> {
+        let ctx = format!("{:?}", pair.as_rule());
+        let mut parts = pair.into_inner();
+        let a = extract_name(next(&mut parts, &ctx)?)?;
+        let b = extract_name(next(&mut parts, &ctx)?)?;
+        Ok((a, b))
+    }
+
+    // ── Pass 1: walk the parse tree ─────────────────────────────────────────
+
+    for pair in file.into_inner() {
+        match pair.as_rule() {
+            Rule::version_line | Rule::EOI => {}
+            Rule::model_name => {
+                model_name = Some(first_str(pair)?);
+            }
+            Rule::initial_view => {
+                initial_view = Some(first_str(pair)?);
+            }
+            Rule::category_section => {
+                let mut inner = pair.into_inner();
+                let cname = next(&mut inner, "category_section")?.as_str().trim().to_string();
+                let mut pc = PCategory {
+                    name: cname,
+                    items: Vec::new(),
+                    group_parents: Vec::new(),
+                };
+                for entry in inner {
+                    match entry.as_rule() {
+                        Rule::item_list => {
+                            for name_pair in entry.into_inner() {
+                                pc.items.push((extract_name(name_pair)?, None));
+                            }
+                        }
+                        Rule::grouped_item => {
+                            let (name, group) = extract_name_pair(entry)?;
+                            pc.items.push((name, Some(group)));
+                        }
+                        Rule::group_hierarchy => {
+                            pc.group_parents.push(extract_name_pair(entry)?);
+                        }
+                        _ => {}
+                    }
+                }
+                categories.push(pc);
+            }
+            Rule::formulas_section => {
+                for fl in pair.into_inner() {
+                    if fl.as_rule() == Rule::formula_line {
+                        let raw = first_str(fl)?;
+                        if let Some(i) = raw.rfind(" [") {
+                            if raw.ends_with(']') {
+                                formulas.push((
+                                    raw[..i].to_string(),
+                                    raw[i + 2..raw.len() - 1].to_string(),
+                                ));
+                            }
+                        }
+                    }
+                }
+            }
+            Rule::data_section => {
+                for dl in pair.into_inner() {
+                    if dl.as_rule() == Rule::data_line {
+                        let mut dl_inner = dl.into_inner();
+                        let coord_list = next(&mut dl_inner, "data_line coords")?;
+                        let value_pair = next(&mut dl_inner, "data_line value")?;
+
+                        let coords: Vec<_> = coord_list
+                            .into_inner()
+                            .filter(|p| p.as_rule() == Rule::coord)
+                            .map(extract_name_pair)
+                            .collect::<Result<_>>()?;
+
+                        let value = match value_pair.as_rule() {
+                            Rule::number => {
+                                CellValue::Number(value_pair.as_str().parse().unwrap_or(0.0))
+                            }
+                            Rule::pipe_quoted => {
+                                let inner = next(&mut value_pair.into_inner(), "pipe_quoted")?;
+                                CellValue::Text(unescape_pipe(inner.as_str()))
+                            }
+                            Rule::bare_value => match value_pair.as_str().trim() {
+                                "inf" => CellValue::Number(f64::INFINITY),
+                                "-inf" => CellValue::Number(f64::NEG_INFINITY),
+                                "nan" => CellValue::Number(f64::NAN),
+                                s => CellValue::Text(s.to_string()),
+                            },
+                            _ => CellValue::Text(value_pair.as_str().to_string()),
+                        };
+
+                        data.push((CellKey::new(coords), value));
+                    }
+                }
+            }
+            Rule::view_section => {
+                let mut inner = pair.into_inner();
+                let vname = next(&mut inner, "view_section")?.as_str().trim().to_string();
+                let mut pv = PView {
+                    name: vname,
+                    axes: Vec::new(),
+                    page_selections: Vec::new(),
+                    format: String::new(),
+                    hidden: Vec::new(),
+                    collapsed: Vec::new(),
+                };
+                for entry in inner {
+                    match entry.as_rule() {
+                        Rule::axis_line => {
+                            let mut parts = entry.into_inner();
+                            let cat = extract_name(next(&mut parts, "axis cat")?)?;
+                            let kind_str = next(&mut parts, "axis kind")?.as_str();
+                            let axis = match kind_str {
+                                "row" => Axis::Row,
+                                "column" => Axis::Column,
+                                "page" => Axis::Page,
+                                "none" => Axis::None,
+                                _ => bail!("Unknown axis kind: {kind_str}"),
+                            };
+                            pv.axes.push((cat.clone(), axis));
+                            if axis == Axis::Page {
+                                if let Some(sel_pair) = parts.next() {
+                                    pv.page_selections.push((cat, extract_name(sel_pair)?));
+                                }
+                            }
+                        }
+                        Rule::format_line => pv.format = first_str(entry)?,
+                        Rule::hidden_line => pv.hidden.push(extract_name_pair(entry)?),
+                        Rule::collapsed_line => pv.collapsed.push(extract_name_pair(entry)?),
+                        _ => {}
+                    }
+                }
+                views.push(pv);
+            }
+            _ => {}
+        }
+    }
+
+    // ── Pass 2: build the Model ─────────────────────────────────────────────
 
     let name = model_name.ok_or_else(|| anyhow::anyhow!("Missing model title (# Name)"))?;
     let mut m = Model::new(&name);
 
-    // Categories first — registers them with all existing views via on_category_added
     for pc in &categories {
         m.add_category(&pc.name)?;
-        let cat = m.category_mut(&pc.name).unwrap();
+        let cat = m
+            .category_mut(&pc.name)
+            .ok_or_else(|| anyhow::anyhow!("Category '{}' not found after add", pc.name))?;
         for (item_name, group) in &pc.items {
             match group {
                 Some(g) => {
@@ -396,12 +485,14 @@ pub fn parse_md(text: &str) -> Result<Model> {
         }
     }
 
-    // Views — all categories are now registered, so set_axis works correctly
     for pv in &views {
         if !m.views.contains_key(&pv.name) {
             m.create_view(&pv.name);
         }
-        let view = m.views.get_mut(&pv.name).unwrap();
+        let view = m
+            .views
+            .get_mut(&pv.name)
+            .ok_or_else(|| anyhow::anyhow!("View '{}' not found after create", pv.name))?;
         for (cat, axis) in &pv.axes {
             view.set_axis(cat, *axis);
         }
@@ -419,14 +510,12 @@ pub fn parse_md(text: &str) -> Result<Model> {
         }
     }
 
-    // Set initial view if specified
     if let Some(iv) = &initial_view {
         if m.views.contains_key(iv) {
             m.active_view = iv.clone();
         }
     }
 
-    // Formulas and data can go in any order relative to each other
     for (raw, cat_name) in &formulas {
         m.add_formula(parse_formula(raw, cat_name).with_context(|| format!("Formula: {raw}"))?);
     }
@@ -437,74 +526,6 @@ pub fn parse_md(text: &str) -> Result<Model> {
     Ok(m)
 }
 
-/// Parse `"Name[Group]"` or `"|Name|[|Group|]"` or `"Name"`.
-/// Returns (name_str, optional_group_str). Both may be pipe-quoted.
-fn parse_bracketed(s: &str) -> (String, Option<String>) {
-    let s = s.trim();
-    // Parse the name part (possibly pipe-quoted)
-    let (name, rest) = if s.starts_with('|') {
-        match parse_maybe_quoted_name(s) {
-            Some((n, r)) => (n, r),
-            None => return (s.to_string(), None),
-        }
-    } else {
-        // Bare name: everything before `[` (if any) or end
-        match s.find('[') {
-            Some(i) => (s[..i].to_string(), &s[i..]),
-            None => return (s.to_string(), None),
-        }
-    };
-
-    // Check for [group] suffix
-    let rest = rest.trim();
-    if rest.starts_with('[') && rest.ends_with(']') {
-        let inner = &rest[1..rest.len() - 1];
-        let group = parse_inline_name(inner);
-        (name, Some(group))
-    } else {
-        (name, None)
-    }
-}
-
-/// Parse a `name/name` path where names may be pipe-quoted.
-fn parse_slash_path(s: &str) -> Option<(String, String)> {
-    if s.starts_with('|') {
-        // First name is pipe-quoted — find closing pipe, then expect /
-        let (name, rest) = parse_maybe_quoted_name(s)?;
-        let rest = rest.strip_prefix('/')?;
-        let item = parse_inline_name(rest);
-        Some((name, item))
-    } else {
-        let (head, tail) = s.split_once('/')?;
-        Some((head.trim().to_string(), parse_inline_name(tail.trim())))
-    }
-}
-
-/// Parse a `name: rest` line where name may be pipe-quoted.
-fn parse_name_colon(s: &str) -> Option<(String, &str)> {
-    if s.starts_with('|') {
-        let (name, rest) = parse_maybe_quoted_name(s)?;
-        let rest = rest.strip_prefix(": ")?;
-        Some((name, rest))
-    } else {
-        let colon = s.find(": ")?;
-        let name = s[..colon].trim().to_string();
-        let rest = s[colon + 2..].trim();
-        Some((name, rest))
-    }
-}
-
-/// Parse a single name that may be pipe-quoted. Returns the unquoted string.
-fn parse_inline_name(s: &str) -> String {
-    let s = s.trim();
-    if s.starts_with('|') {
-        if let Some((name, _)) = parse_maybe_quoted_name(s) {
-            return name;
-        }
-    }
-    s.to_string()
-}
-
 fn coord_str(key: &CellKey) -> String {
     key.0
         .iter()
@@ -513,110 +534,6 @@ fn coord_str(key: &CellKey) -> String {
         .join(", ")
 }
 
-/// Parse a data line like `Cat=Item, Cat2=Item2 = "value"` into coordinates
-/// and a cell value. Handles backtick-quoted names containing `=` or `, `.
-fn parse_data_line(line: &str) -> Option<(Vec<(String, String)>, CellValue)> {
-    // Find the value separator: the last ` = ` that isn't inside a backtick-quoted name.
-    // Strategy: scan for ` = ` from the right, since the value is always at the end.
-    // But the value itself could contain ` = ` if it's a quoted text.
-    // The format is: coords ` = ` value
-    // where value is either a number or "quoted text".
-    //
-    // We find the separator by scanning from left: the first ` = ` that is NOT
-    // inside a backtick-quoted name is the separator. Since coordinates don't
-    // contain ` = ` (they use bare `=`), the first ` = ` is always the separator.
-    let sep = line.find(" = ")?;
-    let coord_part = &line[..sep];
-    let value_part = line[sep + 3..].trim();
-
-    let coords = parse_coord_str(coord_part)?;
-    if coords.is_empty() {
-        return None;
-    }
-
-    let value = if let Ok(n) = value_part.parse::<f64>() {
-        CellValue::Number(n)
-    } else {
-        // Text value — may be pipe-quoted or bare
-        CellValue::Text(parse_inline_name(value_part))
-    };
-
-    Some((coords, value))
-}
-
-/// Parse a coordinate string like `Cat=Item, Cat2=Item2` into pairs.
-/// Handles backtick-quoted names: `` `Income, Gross`=A ``.
-fn parse_coord_str(s: &str) -> Option<Vec<(String, String)>> {
-    let mut pairs = Vec::new();
-    let mut rest = s.trim();
-
-    while !rest.is_empty() {
-        // Parse category name (possibly backtick-quoted)
-        let (cat, after_cat) = parse_maybe_quoted_name(rest)?;
-        let after_cat = after_cat.strip_prefix('=')?;
-        // Parse item name (possibly backtick-quoted)
-        let (item, after_item) = parse_maybe_quoted_name(after_cat)?;
-
-        pairs.push((cat, item));
-
-        let after_item = after_item.trim_start();
-        if after_item.is_empty() {
-            break;
-        }
-        // Expect ", " separator
-        rest = after_item.strip_prefix(", ")?;
-    }
-
-    Some(pairs)
-}
-
-/// Parse a name that may be pipe-quoted. Returns (name, rest_of_string).
-/// Pipe-quoted: `|Income, Gross|` → `"Income, Gross"`.
-/// Backslash escapes inside: `|\||` → `"|"`, `|\\|` → `"\"`, `|\n|` → newline.
-/// Unquoted: stops at `=` or `, ` or end of string.
-fn parse_maybe_quoted_name(s: &str) -> Option<(String, &str)> {
-    if let Some(inner) = s.strip_prefix('|') {
-        // Pipe-quoted name: scan for unescaped closing pipe
-        let mut name = String::new();
-        let mut chars = inner.char_indices();
-        while let Some((i, c)) = chars.next() {
-            if c == '\\' {
-                // Escape sequence
-                if let Some((_, next)) = chars.next() {
-                    match next {
-                        '|' => name.push('|'),
-                        '\\' => name.push('\\'),
-                        'n' => name.push('\n'),
-                        other => {
-                            name.push('\\');
-                            name.push(other);
-                        }
-                    }
-                }
-            } else if c == '|' {
-                // End of quoted name
-                return Some((name, &inner[i + 1..]));
-            } else {
-                name.push(c);
-            }
-        }
-        // Unterminated pipe — treat whole thing as name
-        Some((name, ""))
-    } else {
-        // Unquoted: take chars until `=` or `, ` or end (whichever comes first)
-        let eq_pos = s.find('=');
-        let comma_pos = s.find(", ");
-        let end = match (eq_pos, comma_pos) {
-            (Some(a), Some(b)) => a.min(b),
-            (Some(a), None) => a,
-            (None, Some(b)) => b,
-            (None, None) => s.len(),
-        };
-        let name = s[..end].trim().to_string();
-        Some((name, &s[end..]))
-    }
-}
-
 pub fn export_csv(model: &Model, view_name: &str, path: &Path) -> Result<()> {
     let view = model
         .views
@@ -711,10 +628,10 @@ mod tests {
         let m = two_cat_model();
         let text = format_md(&m);
         assert!(text.contains("## Category: Type"));
-        assert!(text.contains("- Food"));
-        assert!(text.contains("- Gas"));
+        // Bare items are now comma-separated on one line
+        assert!(text.contains("- Food, Gas"), "expected comma-separated items:\n{text}");
         assert!(text.contains("## Category: Month"));
-        assert!(text.contains("- Jan"));
+        assert!(text.contains("Jan"));
     }
 
     #[test]
@@ -739,7 +656,7 @@ mod tests {
             .unwrap()
             .add_group(Group::new("Q1").with_parent("2025"));
         let text = format_md(&m);
-        assert!(text.contains("> Q1[2025]"), "got:\n{text}");
+        assert!(text.contains("> Q1[|2025|]"), "got:\n{text}");
     }
 
     #[test]
@@ -930,7 +847,7 @@ mod tests {
     fn parse_md_order_independent_view_before_categories() {
         // A hand-edited file with the view section before the category sections.
         // The parser must still produce correct axis assignments.
-        let text = "# Test\n\
+        let text = "v2025-04-09\n# Test\n\
                     ## View: Default\n\
                     Type: row\n\
                     Month: column\n\
@@ -945,8 +862,7 @@ mod tests {
 
     #[test]
     fn parse_md_order_independent_new_view_before_categories() {
-        // A non-Default view with swapped axes, declared before categories exist.
-        let text = "# Test\n\
+        let text = "v2025-04-09\n# Test\n\
                     ## View: Transposed\n\
                     Type: column\n\
                     Month: row\n\
@@ -968,7 +884,7 @@ mod tests {
 
     #[test]
     fn parse_md_order_independent_data_before_categories() {
-        let text = "# Test\n\
+        let text = "v2025-04-09\n# Test\n\
                     ## Data\n\
                     Month=Jan, Type=Food = 42\n\
                     ## Category: Type\n\
@@ -1094,11 +1010,11 @@ mod tests {
 
     #[test]
     fn parse_md_ignores_blank_and_comment_lines() {
-        let text = r#"# Test Model
+        let text = r#"v2025-04-09
+# Test Model
 
 ## Category: Type
-- Food
-- Gas
+- Food, Gas
 
 ## Data
 Type=Food = 42
@@ -1945,42 +1861,36 @@ mod parser_edge_cases {
 
     #[test]
     fn parse_just_model_name() {
-        let m = parse_md("# MyModel\n").unwrap();
+        let m = parse_md("v2025-04-09\n# MyModel\n").unwrap();
         assert_eq!(m.name, "MyModel");
     }
 
     #[test]
     fn parse_data_without_value() {
-        // Malformed data line: no " = " separator
-        let text = "# Test\n## Data\nType=Food\n";
-        let m = parse_md(text).unwrap();
-        // Should silently skip the malformed line
-        assert_eq!(m.data.iter_cells().count(), 0);
+        // Malformed data line: no " = " separator — pest rejects it
+        let text = "v2025-04-09\n# Test\n## Data\nType=Food\n";
+        assert!(parse_md(text).is_err());
     }
 
     #[test]
     fn parse_data_with_empty_coords() {
-        // Data line with only value, no coordinates
-        let text = "# Test\n## Data\n = 42\n";
-        let m = parse_md(text).unwrap();
-        assert_eq!(m.data.iter_cells().count(), 0);
+        // Data line with only value, no coordinates — pest rejects it
+        let text = "v2025-04-09\n# Test\n## Data\n = 42\n";
+        assert!(parse_md(text).is_err());
     }
 
     #[test]
     fn parse_duplicate_categories() {
-        // Two categories with the same name
-        let text = "# Test\n## Category: Type\n- A\n## Category: Type\n- B\n";
+        let text = "v2025-04-09\n# Test\n## Category: Type\n- A\n## Category: Type\n- B\n";
         let m = parse_md(text).unwrap();
         let cat = m.category("Type").unwrap();
-        // Second declaration should win or merge
         let item_names: Vec<&str> = cat.items.values().map(|i| i.name.as_str()).collect();
-        // At minimum shouldn't panic
         assert!(!item_names.is_empty());
     }
 
     #[test]
     fn parse_category_with_no_items() {
-        let text = "# Test\n## Category: Empty\n## Category: Full\n- A\n";
+        let text = "v2025-04-09\n# Test\n## Category: Empty\n## Category: Full\n- A\n";
         let m = parse_md(text).unwrap();
         assert!(m.category("Empty").is_some());
         assert_eq!(m.category("Empty").unwrap().items.len(), 0);
@@ -2077,25 +1987,26 @@ mod parser_edge_cases {
 
     #[test]
     fn model_name_with_leading_trailing_spaces() {
-        let text = "#   Spaced Model   \n";
+        let text = "v2025-04-09\n#   Spaced Model   \n";
         let m = parse_md(text).unwrap();
+        // rest_of_line captures everything after "# "; we trim in the builder
         assert_eq!(m.name, "Spaced Model");
     }
 
     #[test]
     fn category_name_with_trailing_spaces() {
-        let text = "# Test\n## Category: Trailing   \n- Item\n";
+        let text = "v2025-04-09\n# Test\n## Category: Trailing   \n- Item\n";
         let m = parse_md(text).unwrap();
+        // rest_of_line includes trailing spaces; we trim in the builder
         assert!(m.category("Trailing").is_some());
     }
 
     #[test]
     fn data_line_with_extra_whitespace() {
-        let text = "# Test\n## Category: T\n- A\n## Category: M\n- J\n## Data\n  T=A ,  M=J  =  42  \n";
-        let m = parse_md(text).unwrap();
-        // Should handle extra whitespace gracefully
-        let count = m.data.iter_cells().count();
-        assert!(count <= 1, "At most one cell should parse: got {count}");
+        // With the pest grammar, extra whitespace in data lines is rejected
+        let text = "v2025-04-09\n# Test\n## Category: T\n- A\n## Category: M\n- J\n## Data\n  T=A ,  M=J  =  42  \n";
+        // pest grammar is strict about whitespace — this should fail
+        assert!(parse_md(text).is_err());
     }
 
     // ── Three-category model ────────────────────────────────────────────
@@ -2173,3 +2084,208 @@ mod parser_edge_cases {
         );
     }
 }
+
+// ── Grammar-walking file generator ───────────────────────────────────────────
+//
+// Parses `improv.pest` at test time and walks the AST to generate random valid
+// files.  The generator and parser share a single source of truth: the grammar.
+
+#[cfg(test)]
+mod gen {
+    use pest_meta::ast::{Expr, RuleType};
+    use pest_meta::parser;
+    use proptest::prelude::*;
+    use std::collections::HashMap;
+
+    /// Parse the grammar file and return rules keyed by name.
+    fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
+        let grammar = include_str!("improv.pest");
+        let pairs = parser::parse(parser::Rule::grammar_rules, grammar)
+            .unwrap_or_else(|e| panic!("Bad grammar: {e}"));
+        let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
+        rules
+            .into_iter()
+            .map(|r| (r.name.clone(), (r.ty, r.expr)))
+            .collect()
+    }
+
+    /// Recursive string generator driven by a pest `Expr`.
+    ///
+    /// `choices` is consumed left-to-right for every decision point (Choice,
+    /// Opt, Rep).  If it runs out we pick the "smallest" alternative (first
+    /// branch, no repetition, skip optional).
+    struct Gen<'g> {
+        rules: &'g HashMap<String, (RuleType, Expr)>,
+        choices: Vec<u8>,
+        pos: usize,
+    }
+
+    impl<'g> Gen<'g> {
+        fn new(rules: &'g HashMap<String, (RuleType, Expr)>, choices: Vec<u8>) -> Self {
+            Self {
+                rules,
+                choices,
+                pos: 0,
+            }
+        }
+
+        /// Consume one byte of entropy, defaulting to 0.
+        fn pick(&mut self) -> u8 {
+            let v = self.choices.get(self.pos).copied().unwrap_or(0);
+            self.pos += 1;
+            v
+        }
+
+        fn emit(&mut self, expr: &Expr, out: &mut String) {
+            match expr {
+                Expr::Str(s) => out.push_str(s),
+                Expr::Range(lo, hi) => {
+                    let lo = lo.chars().next().unwrap() as u32;
+                    let hi = hi.chars().next().unwrap() as u32;
+                    let range = hi - lo + 1;
+                    let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
+                    out.push(ch);
+                }
+                Expr::Ident(name) => {
+                    // Built-in pest rules
+                    match name.as_str() {
+                        "ANY" => {
+                            let ch = (b'a' + self.pick() % 26) as char;
+                            out.push(ch);
+                        }
+                        "NEWLINE" => out.push('\n'),
+                        "SOI" | "EOI" => {}
+                        "ASCII_DIGIT" => {
+                            let d = (b'0' + self.pick() % 10) as char;
+                            out.push(d);
+                        }
+                        _ => {
+                            // Look up user-defined rule
+                            if let Some((_ty, expr)) = self.rules.get(name) {
+                                self.emit(expr, out);
+                            }
+                        }
+                    }
+                }
+                Expr::Seq(a, b) => {
+                    self.emit(a, out);
+                    self.emit(b, out);
+                }
+                Expr::Choice(a, b) => {
+                    // Collect all choices (right-associated)
+                    let mut alts: Vec<&Expr> = vec![a.as_ref()];
+                    let mut cur = b.as_ref();
+                    while let Expr::Choice(l, r) = cur {
+                        alts.push(l.as_ref());
+                        cur = r.as_ref();
+                    }
+                    alts.push(cur);
+                    let idx = self.pick() as usize % alts.len();
+                    self.emit(alts[idx], out);
+                }
+                Expr::Opt(inner) => {
+                    if self.pick() % 3 != 0 {
+                        // ~66% chance of emitting
+                        self.emit(inner, out);
+                    }
+                }
+                Expr::Rep(inner) => {
+                    // 0..N repetitions
+                    let count = self.pick() % 4;
+                    for _ in 0..count {
+                        self.emit(inner, out);
+                    }
+                }
+                Expr::RepOnce(inner) => {
+                    // 1..N repetitions
+                    let count = 1 + self.pick() % 3;
+                    for _ in 0..count {
+                        self.emit(inner, out);
+                    }
+                }
+                Expr::NegPred(_) | Expr::PosPred(_) => {
+                    // Lookaheads don't produce output
+                }
+                _ => {
+                    // Skip unsupported expressions
+                }
+            }
+        }
+
+        fn generate(&mut self, rule_name: &str) -> String {
+            let mut out = String::new();
+            if let Some((_ty, expr)) = self.rules.get(rule_name).cloned() {
+                self.emit(&expr, &mut out);
+            }
+            out
+        }
+    }
+
+    /// Proptest strategy: generate a valid `.improv` file by walking the grammar.
+    pub fn improv_file() -> impl Strategy<Value = String> {
+        // Use random bytes as entropy for choices in the grammar walk
+        prop::collection::vec(any::<u8>(), 64..=256).prop_map(|choices| {
+            let rules = load_grammar();
+            let mut gen = Gen::new(&rules, choices);
+            gen.generate("file")
+        })
+    }
+}
+
+#[cfg(test)]
+mod grammar_prop_tests {
+    use super::{format_md, gen, parse_md};
+    use proptest::prelude::*;
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(500))]
+
+        /// parse(generate()) — every generated file parses without error.
+        #[test]
+        fn generated_file_parses(file in gen::improv_file()) {
+            let result = parse_md(&file);
+            prop_assert!(result.is_ok(),
+                "Generated file failed to parse:\n{}\nError: {}",
+                file, result.unwrap_err());
+        }
+
+        /// parse(print(parse(generate()))) — round-trip through format is stable.
+        #[test]
+        fn generated_file_roundtrips(file in gen::improv_file()) {
+            let result1 = parse_md(&file);
+            // Skip inputs that don't parse (the grammar walk may produce
+            // degenerate inputs like empty model names)
+            prop_assume!(result1.is_ok());
+            let model1 = result1.unwrap();
+            let printed = format_md(&model1);
+            let model2_result = parse_md(&printed);
+            prop_assert!(model2_result.is_ok(),
+                "Re-formatted file failed to parse:\n{}\nError: {}",
+                printed, model2_result.unwrap_err());
+
+            let model2 = model2_result.unwrap();
+
+            // Model name preserved
+            prop_assert_eq!(&model1.name, &model2.name);
+
+            // Category count preserved
+            prop_assert_eq!(
+                model1.categories.len(),
+                model2.categories.len(),
+                "Category count changed"
+            );
+
+            // Cell count preserved
+            let count1 = model1.data.iter_cells().count();
+            let count2 = model2.data.iter_cells().count();
+            prop_assert_eq!(count1, count2,
+                "Cell count changed: {} → {}\nOriginal:\n{}\nRe-formatted:\n{}",
+                count1, count2, file, printed);
+
+            // Double round-trip: format(parse(format(parse(gen)))) == format(parse(gen))
+            let printed2 = format_md(&model2);
+            prop_assert_eq!(&printed, &printed2,
+                "format→parse→format not idempotent");
+        }
+    }
+}