feat(examples): add grammar generation and pretty-printing utilities

Add new examples for generating sample .improv data based on the Pest
grammar and pretty-printing existing .improv files.

Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf)
This commit is contained in:
Edward Langley
2026-04-13 21:30:19 -07:00
parent c48a5cd575
commit ed1ee7e23a
2 changed files with 340 additions and 0 deletions

315
examples/gen-grammar.rs Normal file
View File

@ -0,0 +1,315 @@
//! Generate a random valid example matching a rule from `improv.pest`.
//!
//! Usage:
//! cargo run --example gen-grammar -- <rule_name>
//!
//! Examples:
//! cargo run --example gen-grammar -- file
//! cargo run --example gen-grammar -- category_section
//! cargo run --example gen-grammar -- bare_name
//!
//! Each invocation generates one example seeded from the current time + PID.
//!
//! The generator adds constraints beyond what the grammar requires to produce
//! realistic, round-trippable output:
//! - bare names are drawn from a word pool instead of random letters
//! - pipe_inner is never empty
//! - rest_of_line always produces at least one character
//! - repetitions (`*`) produce 14 items, not 0
use pest_meta::ast::{Expr, RuleType};
use pest_meta::parser;
use std::collections::HashMap;
const GRAMMAR: &str = include_str!("../src/persistence/improv.pest");
fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR)
.unwrap_or_else(|e| panic!("Bad grammar: {e}"));
let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
rules
.into_iter()
.map(|r| (r.name.clone(), (r.ty, r.expr)))
.collect()
}
// ── Word pools for realistic output ─────────────────────────────────────────
const BARE_WORDS: &[&str] = &[
"Region", "Product", "Customer", "Channel", "Date",
"North", "South", "East", "West",
"Revenue", "Cost", "Profit", "Margin",
"Widgets", "Gadgets", "Sprockets",
"Q1", "Q2", "Q3", "Q4",
"Jan", "Feb", "Mar", "Apr",
"Acme", "Globex", "Initech", "Umbrella",
];
const QUOTED_WORDS: &[&str] = &[
"Total Revenue", "Net Income", "Gross Margin",
"2025-01", "2025-02", "2025-03",
"East Coast", "West Coast",
"Acme Corp", "Globex Inc",
"Cost of Goods", "Operating Expense",
];
const MODEL_NAMES: &[&str] = &[
"Sales Report", "Budget 2025", "Quarterly Review",
"Inventory Model", "Revenue Analysis", "Demo Model",
];
const VIEW_NAMES: &[&str] = &[
"Default", "Summary", "Detail", "By Region", "Monthly",
];
const FORMULA_EXPRS: &[&str] = &[
"Profit = Revenue - Cost",
"Margin = Profit / Revenue",
"Tax = Revenue * 0.1",
"Total = SUM(Revenue)",
"Net = Revenue - Cost - Tax",
];
const FORMAT_STRINGS: &[&str] = &[
",.0", ",.2f", ",.1f", ".0%",
];
// ── PRNG ────────────────────────────────────────────────────────────────────
struct Xs64(u64);
impl Xs64 {
fn new(seed: u64) -> Self {
Self(seed.max(1))
}
fn next(&mut self) -> u64 {
self.0 ^= self.0 << 13;
self.0 ^= self.0 >> 7;
self.0 ^= self.0 << 17;
self.0
}
fn byte(&mut self) -> u8 {
(self.next() & 0xff) as u8
}
fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str {
pool[self.next() as usize % pool.len()]
}
}
// ── Generator ───────────────────────────────────────────────────────────────
struct Gen<'g> {
rules: &'g HashMap<String, (RuleType, Expr)>,
rng: Xs64,
}
impl<'g> Gen<'g> {
fn new(rules: &'g HashMap<String, (RuleType, Expr)>, seed: u64) -> Self {
Self {
rules,
rng: Xs64::new(seed),
}
}
fn pick(&mut self) -> u8 {
self.rng.byte()
}
/// Try a rule-specific override. Returns true if handled.
fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool {
match rule_name {
"bare_name" => {
out.push_str(self.rng.pick_from(BARE_WORDS));
true
}
"pipe_inner" => {
// Never empty
out.push_str(self.rng.pick_from(QUOTED_WORDS));
true
}
"rest_of_line" => {
// Context-sensitive: produce something non-empty
let word_count = 1 + self.pick() % 3;
for i in 0..word_count {
if i > 0 {
out.push(' ');
}
out.push_str(self.rng.pick_from(BARE_WORDS));
}
true
}
"model_name" => {
out.push_str("# ");
out.push_str(self.rng.pick_from(MODEL_NAMES));
out.push('\n');
true
}
"format_line" => {
out.push_str("format: ");
out.push_str(self.rng.pick_from(FORMAT_STRINGS));
out.push('\n');
true
}
"formula_line" => {
out.push_str("- ");
out.push_str(self.rng.pick_from(FORMULA_EXPRS));
out.push('\n');
true
}
"number" => {
let whole = 1 + self.rng.next() % 99999;
if self.pick() % 3 == 0 {
let frac = self.rng.next() % 100;
out.push_str(&format!("{whole}.{frac:02}"));
} else {
out.push_str(&format!("{whole}"));
}
true
}
"axis_kind" => {
let kinds = ["row", "column", "page", "none"];
out.push_str(kinds[self.pick() as usize % kinds.len()]);
true
}
"view_section" => {
out.push_str("## View: ");
out.push_str(self.rng.pick_from(VIEW_NAMES));
out.push('\n');
// Generate view_entry* from the grammar
let count = 1 + self.pick() % 4;
if let Some((_ty, expr)) = self.rules.get("view_entry") {
let expr = expr.clone();
for _ in 0..count {
self.emit(&expr, out);
}
}
true
}
_ => false,
}
}
fn emit(&mut self, expr: &Expr, out: &mut String) {
match expr {
Expr::Str(s) => out.push_str(s),
Expr::Range(lo, hi) => {
let lo = lo.chars().next().unwrap() as u32;
let hi = hi.chars().next().unwrap() as u32;
let range = hi - lo + 1;
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
out.push(ch);
}
Expr::Ident(name) => match name.as_str() {
"ANY" => {
let ch = (b'a' + self.pick() % 26) as char;
out.push(ch);
}
"NEWLINE" => out.push('\n'),
"SOI" | "EOI" => {}
"ASCII_DIGIT" => {
let d = (b'0' + self.pick() % 10) as char;
out.push(d);
}
_ => {
// Try override first, fall back to grammar walk
if !self.try_override(name, out) {
if let Some((_ty, expr)) = self.rules.get(name) {
let expr = expr.clone();
self.emit(&expr, out);
}
}
}
},
Expr::Seq(a, b) => {
self.emit(a, out);
self.emit(b, out);
}
Expr::Choice(a, b) => {
let mut alts: Vec<&Expr> = vec![a.as_ref()];
let mut cur = b.as_ref();
while let Expr::Choice(l, r) = cur {
alts.push(l.as_ref());
cur = r.as_ref();
}
alts.push(cur);
let idx = self.pick() as usize % alts.len();
self.emit(alts[idx], out);
}
Expr::Opt(inner) => {
if self.pick() % 3 != 0 {
// ~66% chance of emitting
self.emit(inner, out);
}
}
Expr::Rep(inner) => {
// 14 reps (never 0 — avoid degenerate empty output)
let count = 1 + self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::RepOnce(inner) => {
let count = 1 + self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::NegPred(_) | Expr::PosPred(_) => {}
_ => {}
}
}
fn generate(&mut self, rule_name: &str) -> Option<String> {
// Check override first (for top-level rule invocation)
let mut out = String::new();
if self.try_override(rule_name, &mut out) {
return Some(out);
}
let (_ty, expr) = self.rules.get(rule_name)?.clone();
self.emit(&expr, &mut out);
Some(out)
}
}
fn print_rules(rules: &HashMap<String, (RuleType, Expr)>) {
let mut names: Vec<_> = rules.keys().collect();
names.sort();
for name in names {
println!(" {name}");
}
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let rules = load_grammar();
if args.len() < 2 {
eprintln!("Usage: {} <rule_name>", args[0]);
eprintln!();
eprintln!("Available rules:");
print_rules(&rules);
std::process::exit(2);
}
let rule = &args[1];
if !rules.contains_key(rule) {
eprintln!("Unknown rule '{rule}'. Available rules:");
print_rules(&rules);
std::process::exit(1);
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15);
let mut g = Gen::new(&rules, seed);
match g.generate(rule) {
Some(out) => print!("{out}"),
None => {
eprintln!("Failed to generate from rule '{rule}'");
std::process::exit(1);
}
}
}