Files
improvise/examples/gen-grammar.rs
Edward Langley ed1ee7e23a feat(examples): add grammar generation and pretty-printing utilities
Add new examples for generating sample .improv data based on the Pest
grammar and pretty-printing existing .improv files.

Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf)
2026-04-13 21:30:19 -07:00

316 lines
10 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Generate a random valid example matching a rule from `improv.pest`.
//!
//! Usage:
//! cargo run --example gen-grammar -- <rule_name>
//!
//! Examples:
//! cargo run --example gen-grammar -- file
//! cargo run --example gen-grammar -- category_section
//! cargo run --example gen-grammar -- bare_name
//!
//! Each invocation generates one example seeded from the current time + PID.
//!
//! The generator adds constraints beyond what the grammar requires to produce
//! realistic, round-trippable output:
//! - bare names are drawn from a word pool instead of random letters
//! - pipe_inner is never empty
//! - rest_of_line always produces at least one character
//! - repetitions (`*`) produce 14 items, not 0
use pest_meta::ast::{Expr, RuleType};
use pest_meta::parser;
use std::collections::HashMap;
const GRAMMAR: &str = include_str!("../src/persistence/improv.pest");
fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR)
.unwrap_or_else(|e| panic!("Bad grammar: {e}"));
let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
rules
.into_iter()
.map(|r| (r.name.clone(), (r.ty, r.expr)))
.collect()
}
// ── Word pools for realistic output ─────────────────────────────────────────
const BARE_WORDS: &[&str] = &[
"Region", "Product", "Customer", "Channel", "Date",
"North", "South", "East", "West",
"Revenue", "Cost", "Profit", "Margin",
"Widgets", "Gadgets", "Sprockets",
"Q1", "Q2", "Q3", "Q4",
"Jan", "Feb", "Mar", "Apr",
"Acme", "Globex", "Initech", "Umbrella",
];
const QUOTED_WORDS: &[&str] = &[
"Total Revenue", "Net Income", "Gross Margin",
"2025-01", "2025-02", "2025-03",
"East Coast", "West Coast",
"Acme Corp", "Globex Inc",
"Cost of Goods", "Operating Expense",
];
const MODEL_NAMES: &[&str] = &[
"Sales Report", "Budget 2025", "Quarterly Review",
"Inventory Model", "Revenue Analysis", "Demo Model",
];
const VIEW_NAMES: &[&str] = &[
"Default", "Summary", "Detail", "By Region", "Monthly",
];
const FORMULA_EXPRS: &[&str] = &[
"Profit = Revenue - Cost",
"Margin = Profit / Revenue",
"Tax = Revenue * 0.1",
"Total = SUM(Revenue)",
"Net = Revenue - Cost - Tax",
];
const FORMAT_STRINGS: &[&str] = &[
",.0", ",.2f", ",.1f", ".0%",
];
// ── PRNG ────────────────────────────────────────────────────────────────────
struct Xs64(u64);
impl Xs64 {
fn new(seed: u64) -> Self {
Self(seed.max(1))
}
fn next(&mut self) -> u64 {
self.0 ^= self.0 << 13;
self.0 ^= self.0 >> 7;
self.0 ^= self.0 << 17;
self.0
}
fn byte(&mut self) -> u8 {
(self.next() & 0xff) as u8
}
fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str {
pool[self.next() as usize % pool.len()]
}
}
// ── Generator ───────────────────────────────────────────────────────────────
struct Gen<'g> {
rules: &'g HashMap<String, (RuleType, Expr)>,
rng: Xs64,
}
impl<'g> Gen<'g> {
fn new(rules: &'g HashMap<String, (RuleType, Expr)>, seed: u64) -> Self {
Self {
rules,
rng: Xs64::new(seed),
}
}
fn pick(&mut self) -> u8 {
self.rng.byte()
}
/// Try a rule-specific override. Returns true if handled.
fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool {
match rule_name {
"bare_name" => {
out.push_str(self.rng.pick_from(BARE_WORDS));
true
}
"pipe_inner" => {
// Never empty
out.push_str(self.rng.pick_from(QUOTED_WORDS));
true
}
"rest_of_line" => {
// Context-sensitive: produce something non-empty
let word_count = 1 + self.pick() % 3;
for i in 0..word_count {
if i > 0 {
out.push(' ');
}
out.push_str(self.rng.pick_from(BARE_WORDS));
}
true
}
"model_name" => {
out.push_str("# ");
out.push_str(self.rng.pick_from(MODEL_NAMES));
out.push('\n');
true
}
"format_line" => {
out.push_str("format: ");
out.push_str(self.rng.pick_from(FORMAT_STRINGS));
out.push('\n');
true
}
"formula_line" => {
out.push_str("- ");
out.push_str(self.rng.pick_from(FORMULA_EXPRS));
out.push('\n');
true
}
"number" => {
let whole = 1 + self.rng.next() % 99999;
if self.pick() % 3 == 0 {
let frac = self.rng.next() % 100;
out.push_str(&format!("{whole}.{frac:02}"));
} else {
out.push_str(&format!("{whole}"));
}
true
}
"axis_kind" => {
let kinds = ["row", "column", "page", "none"];
out.push_str(kinds[self.pick() as usize % kinds.len()]);
true
}
"view_section" => {
out.push_str("## View: ");
out.push_str(self.rng.pick_from(VIEW_NAMES));
out.push('\n');
// Generate view_entry* from the grammar
let count = 1 + self.pick() % 4;
if let Some((_ty, expr)) = self.rules.get("view_entry") {
let expr = expr.clone();
for _ in 0..count {
self.emit(&expr, out);
}
}
true
}
_ => false,
}
}
fn emit(&mut self, expr: &Expr, out: &mut String) {
match expr {
Expr::Str(s) => out.push_str(s),
Expr::Range(lo, hi) => {
let lo = lo.chars().next().unwrap() as u32;
let hi = hi.chars().next().unwrap() as u32;
let range = hi - lo + 1;
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
out.push(ch);
}
Expr::Ident(name) => match name.as_str() {
"ANY" => {
let ch = (b'a' + self.pick() % 26) as char;
out.push(ch);
}
"NEWLINE" => out.push('\n'),
"SOI" | "EOI" => {}
"ASCII_DIGIT" => {
let d = (b'0' + self.pick() % 10) as char;
out.push(d);
}
_ => {
// Try override first, fall back to grammar walk
if !self.try_override(name, out) {
if let Some((_ty, expr)) = self.rules.get(name) {
let expr = expr.clone();
self.emit(&expr, out);
}
}
}
},
Expr::Seq(a, b) => {
self.emit(a, out);
self.emit(b, out);
}
Expr::Choice(a, b) => {
let mut alts: Vec<&Expr> = vec![a.as_ref()];
let mut cur = b.as_ref();
while let Expr::Choice(l, r) = cur {
alts.push(l.as_ref());
cur = r.as_ref();
}
alts.push(cur);
let idx = self.pick() as usize % alts.len();
self.emit(alts[idx], out);
}
Expr::Opt(inner) => {
if self.pick() % 3 != 0 {
// ~66% chance of emitting
self.emit(inner, out);
}
}
Expr::Rep(inner) => {
// 14 reps (never 0 — avoid degenerate empty output)
let count = 1 + self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::RepOnce(inner) => {
let count = 1 + self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::NegPred(_) | Expr::PosPred(_) => {}
_ => {}
}
}
fn generate(&mut self, rule_name: &str) -> Option<String> {
// Check override first (for top-level rule invocation)
let mut out = String::new();
if self.try_override(rule_name, &mut out) {
return Some(out);
}
let (_ty, expr) = self.rules.get(rule_name)?.clone();
self.emit(&expr, &mut out);
Some(out)
}
}
fn print_rules(rules: &HashMap<String, (RuleType, Expr)>) {
let mut names: Vec<_> = rules.keys().collect();
names.sort();
for name in names {
println!(" {name}");
}
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let rules = load_grammar();
if args.len() < 2 {
eprintln!("Usage: {} <rule_name>", args[0]);
eprintln!();
eprintln!("Available rules:");
print_rules(&rules);
std::process::exit(2);
}
let rule = &args[1];
if !rules.contains_key(rule) {
eprintln!("Unknown rule '{rule}'. Available rules:");
print_rules(&rules);
std::process::exit(1);
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15);
let mut g = Gen::new(&rules, seed);
match g.generate(rule) {
Some(out) => print!("{out}"),
None => {
eprintln!("Failed to generate from rule '{rule}'");
std::process::exit(1);
}
}
}