//! Generate a random valid example matching a rule from `improv.pest`. //! //! Usage: //! cargo run --example gen-grammar -- //! //! Examples: //! cargo run --example gen-grammar -- file //! cargo run --example gen-grammar -- category_section //! cargo run --example gen-grammar -- bare_name //! //! Each invocation generates one example seeded from the current time + PID. //! //! The generator adds constraints beyond what the grammar requires to produce //! realistic, round-trippable output: //! - bare names are drawn from a word pool instead of random letters //! - pipe_inner is never empty //! - rest_of_line always produces at least one character //! - repetitions (`*`) produce 1–4 items, not 0 use pest_meta::ast::{Expr, RuleType}; use pest_meta::parser; use std::collections::HashMap; const GRAMMAR: &str = include_str!("../src/persistence/improv.pest"); fn load_grammar() -> HashMap { let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR) .unwrap_or_else(|e| panic!("Bad grammar: {e}")); let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}")); rules .into_iter() .map(|r| (r.name.clone(), (r.ty, r.expr))) .collect() } // ── Word pools for realistic output ───────────────────────────────────────── const BARE_WORDS: &[&str] = &[ "Region", "Product", "Customer", "Channel", "Date", "North", "South", "East", "West", "Revenue", "Cost", "Profit", "Margin", "Widgets", "Gadgets", "Sprockets", "Q1", "Q2", "Q3", "Q4", "Jan", "Feb", "Mar", "Apr", "Acme", "Globex", "Initech", "Umbrella", ]; const QUOTED_WORDS: &[&str] = &[ "Total Revenue", "Net Income", "Gross Margin", "2025-01", "2025-02", "2025-03", "East Coast", "West Coast", "Acme Corp", "Globex Inc", "Cost of Goods", "Operating Expense", ]; const MODEL_NAMES: &[&str] = &[ "Sales Report", "Budget 2025", "Quarterly Review", "Inventory Model", "Revenue Analysis", "Demo Model", ]; const VIEW_NAMES: &[&str] = &["Default", "Summary", "Detail", "By Region", "Monthly"]; const FORMULA_EXPRS: &[&str] = &[ "Profit = Revenue - Cost", "Margin = Profit / Revenue", "Tax = Revenue * 0.1", "Total = SUM(Revenue)", "Net = Revenue - Cost - Tax", ]; const FORMAT_STRINGS: &[&str] = &[",.0", ",.2f", ",.1f", ".0%"]; // ── PRNG ──────────────────────────────────────────────────────────────────── struct Xs64(u64); impl Xs64 { fn new(seed: u64) -> Self { Self(seed.max(1)) } fn next(&mut self) -> u64 { self.0 ^= self.0 << 13; self.0 ^= self.0 >> 7; self.0 ^= self.0 << 17; self.0 } fn byte(&mut self) -> u8 { (self.next() & 0xff) as u8 } fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str { pool[self.next() as usize % pool.len()] } } // ── Generator ─────────────────────────────────────────────────────────────── struct Gen<'g> { rules: &'g HashMap, rng: Xs64, } impl<'g> Gen<'g> { fn new(rules: &'g HashMap, seed: u64) -> Self { Self { rules, rng: Xs64::new(seed), } } fn pick(&mut self) -> u8 { self.rng.byte() } /// Try a rule-specific override. Returns true if handled. fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool { match rule_name { "bare_name" => { out.push_str(self.rng.pick_from(BARE_WORDS)); true } "pipe_inner" => { // Never empty out.push_str(self.rng.pick_from(QUOTED_WORDS)); true } "rest_of_line" => { // Context-sensitive: produce something non-empty let word_count = 1 + self.pick() % 3; for i in 0..word_count { if i > 0 { out.push(' '); } out.push_str(self.rng.pick_from(BARE_WORDS)); } true } "model_name" => { out.push_str("# "); out.push_str(self.rng.pick_from(MODEL_NAMES)); out.push('\n'); true } "format_line" => { out.push_str("format: "); out.push_str(self.rng.pick_from(FORMAT_STRINGS)); out.push('\n'); true } "formula_line" => { out.push_str("- "); out.push_str(self.rng.pick_from(FORMULA_EXPRS)); out.push('\n'); true } "number" => { let whole = 1 + self.rng.next() % 99999; if self.pick() % 3 == 0 { let frac = self.rng.next() % 100; out.push_str(&format!("{whole}.{frac:02}")); } else { out.push_str(&format!("{whole}")); } true } "axis_kind" => { let kinds = ["row", "column", "page", "none"]; out.push_str(kinds[self.pick() as usize % kinds.len()]); true } "view_section" => { out.push_str("## View: "); out.push_str(self.rng.pick_from(VIEW_NAMES)); out.push('\n'); // Generate view_entry* from the grammar let count = 1 + self.pick() % 4; if let Some((_ty, expr)) = self.rules.get("view_entry") { let expr = expr.clone(); for _ in 0..count { self.emit(&expr, out); } } true } _ => false, } } fn emit(&mut self, expr: &Expr, out: &mut String) { match expr { Expr::Str(s) => out.push_str(s), Expr::Range(lo, hi) => { let lo = lo.chars().next().unwrap() as u32; let hi = hi.chars().next().unwrap() as u32; let range = hi - lo + 1; let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap(); out.push(ch); } Expr::Ident(name) => match name.as_str() { "ANY" => { let ch = (b'a' + self.pick() % 26) as char; out.push(ch); } "NEWLINE" => out.push('\n'), "SOI" | "EOI" => {} "ASCII_DIGIT" => { let d = (b'0' + self.pick() % 10) as char; out.push(d); } _ => { // Try override first, fall back to grammar walk if !self.try_override(name, out) { if let Some((_ty, expr)) = self.rules.get(name) { let expr = expr.clone(); self.emit(&expr, out); } } } }, Expr::Seq(a, b) => { self.emit(a, out); self.emit(b, out); } Expr::Choice(a, b) => { let mut alts: Vec<&Expr> = vec![a.as_ref()]; let mut cur = b.as_ref(); while let Expr::Choice(l, r) = cur { alts.push(l.as_ref()); cur = r.as_ref(); } alts.push(cur); let idx = self.pick() as usize % alts.len(); self.emit(alts[idx], out); } Expr::Opt(inner) => { if self.pick() % 3 != 0 { // ~66% chance of emitting self.emit(inner, out); } } Expr::Rep(inner) => { // 1–4 reps (never 0 — avoid degenerate empty output) let count = 1 + self.pick() % 4; for _ in 0..count { self.emit(inner, out); } } Expr::RepOnce(inner) => { let count = 1 + self.pick() % 4; for _ in 0..count { self.emit(inner, out); } } Expr::NegPred(_) | Expr::PosPred(_) => {} _ => {} } } fn generate(&mut self, rule_name: &str) -> Option { // Check override first (for top-level rule invocation) let mut out = String::new(); if self.try_override(rule_name, &mut out) { return Some(out); } let (_ty, expr) = self.rules.get(rule_name)?.clone(); self.emit(&expr, &mut out); Some(out) } } fn print_rules(rules: &HashMap) { let mut names: Vec<_> = rules.keys().collect(); names.sort(); for name in names { println!(" {name}"); } } fn main() { let args: Vec = std::env::args().collect(); let rules = load_grammar(); if args.len() < 2 { eprintln!("Usage: {} ", args[0]); eprintln!(); eprintln!("Available rules:"); print_rules(&rules); std::process::exit(2); } let rule = &args[1]; if !rules.contains_key(rule) { eprintln!("Unknown rule '{rule}'. Available rules:"); print_rules(&rules); std::process::exit(1); } let seed = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_nanos() as u64) .unwrap_or(0) ^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15); let mut g = Gen::new(&rules, seed); match g.generate(rule) { Some(out) => print!("{out}"), None => { eprintln!("Failed to generate from rule '{rule}'"); std::process::exit(1); } } }