diff --git a/examples/gen-grammar.rs b/examples/gen-grammar.rs new file mode 100644 index 0000000..321fffc --- /dev/null +++ b/examples/gen-grammar.rs @@ -0,0 +1,315 @@ +//! Generate a random valid example matching a rule from `improv.pest`. +//! +//! Usage: +//! cargo run --example gen-grammar -- +//! +//! Examples: +//! cargo run --example gen-grammar -- file +//! cargo run --example gen-grammar -- category_section +//! cargo run --example gen-grammar -- bare_name +//! +//! Each invocation generates one example seeded from the current time + PID. +//! +//! The generator adds constraints beyond what the grammar requires to produce +//! realistic, round-trippable output: +//! - bare names are drawn from a word pool instead of random letters +//! - pipe_inner is never empty +//! - rest_of_line always produces at least one character +//! - repetitions (`*`) produce 1–4 items, not 0 + +use pest_meta::ast::{Expr, RuleType}; +use pest_meta::parser; +use std::collections::HashMap; + +const GRAMMAR: &str = include_str!("../src/persistence/improv.pest"); + +fn load_grammar() -> HashMap { + let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR) + .unwrap_or_else(|e| panic!("Bad grammar: {e}")); + let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}")); + rules + .into_iter() + .map(|r| (r.name.clone(), (r.ty, r.expr))) + .collect() +} + +// ── Word pools for realistic output ───────────────────────────────────────── + +const BARE_WORDS: &[&str] = &[ + "Region", "Product", "Customer", "Channel", "Date", + "North", "South", "East", "West", + "Revenue", "Cost", "Profit", "Margin", + "Widgets", "Gadgets", "Sprockets", + "Q1", "Q2", "Q3", "Q4", + "Jan", "Feb", "Mar", "Apr", + "Acme", "Globex", "Initech", "Umbrella", +]; + +const QUOTED_WORDS: &[&str] = &[ + "Total Revenue", "Net Income", "Gross Margin", + "2025-01", "2025-02", "2025-03", + "East Coast", "West Coast", + "Acme Corp", "Globex Inc", + "Cost of Goods", "Operating Expense", +]; + +const MODEL_NAMES: &[&str] = &[ + "Sales Report", "Budget 2025", "Quarterly Review", + "Inventory Model", "Revenue Analysis", "Demo Model", +]; + +const VIEW_NAMES: &[&str] = &[ + "Default", "Summary", "Detail", "By Region", "Monthly", +]; + +const FORMULA_EXPRS: &[&str] = &[ + "Profit = Revenue - Cost", + "Margin = Profit / Revenue", + "Tax = Revenue * 0.1", + "Total = SUM(Revenue)", + "Net = Revenue - Cost - Tax", +]; + +const FORMAT_STRINGS: &[&str] = &[ + ",.0", ",.2f", ",.1f", ".0%", +]; + +// ── PRNG ──────────────────────────────────────────────────────────────────── + +struct Xs64(u64); + +impl Xs64 { + fn new(seed: u64) -> Self { + Self(seed.max(1)) + } + fn next(&mut self) -> u64 { + self.0 ^= self.0 << 13; + self.0 ^= self.0 >> 7; + self.0 ^= self.0 << 17; + self.0 + } + fn byte(&mut self) -> u8 { + (self.next() & 0xff) as u8 + } + fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str { + pool[self.next() as usize % pool.len()] + } +} + +// ── Generator ─────────────────────────────────────────────────────────────── + +struct Gen<'g> { + rules: &'g HashMap, + rng: Xs64, +} + +impl<'g> Gen<'g> { + fn new(rules: &'g HashMap, seed: u64) -> Self { + Self { + rules, + rng: Xs64::new(seed), + } + } + + fn pick(&mut self) -> u8 { + self.rng.byte() + } + + /// Try a rule-specific override. Returns true if handled. + fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool { + match rule_name { + "bare_name" => { + out.push_str(self.rng.pick_from(BARE_WORDS)); + true + } + "pipe_inner" => { + // Never empty + out.push_str(self.rng.pick_from(QUOTED_WORDS)); + true + } + "rest_of_line" => { + // Context-sensitive: produce something non-empty + let word_count = 1 + self.pick() % 3; + for i in 0..word_count { + if i > 0 { + out.push(' '); + } + out.push_str(self.rng.pick_from(BARE_WORDS)); + } + true + } + "model_name" => { + out.push_str("# "); + out.push_str(self.rng.pick_from(MODEL_NAMES)); + out.push('\n'); + true + } + "format_line" => { + out.push_str("format: "); + out.push_str(self.rng.pick_from(FORMAT_STRINGS)); + out.push('\n'); + true + } + "formula_line" => { + out.push_str("- "); + out.push_str(self.rng.pick_from(FORMULA_EXPRS)); + out.push('\n'); + true + } + "number" => { + let whole = 1 + self.rng.next() % 99999; + if self.pick() % 3 == 0 { + let frac = self.rng.next() % 100; + out.push_str(&format!("{whole}.{frac:02}")); + } else { + out.push_str(&format!("{whole}")); + } + true + } + "axis_kind" => { + let kinds = ["row", "column", "page", "none"]; + out.push_str(kinds[self.pick() as usize % kinds.len()]); + true + } + "view_section" => { + out.push_str("## View: "); + out.push_str(self.rng.pick_from(VIEW_NAMES)); + out.push('\n'); + // Generate view_entry* from the grammar + let count = 1 + self.pick() % 4; + if let Some((_ty, expr)) = self.rules.get("view_entry") { + let expr = expr.clone(); + for _ in 0..count { + self.emit(&expr, out); + } + } + true + } + _ => false, + } + } + + fn emit(&mut self, expr: &Expr, out: &mut String) { + match expr { + Expr::Str(s) => out.push_str(s), + Expr::Range(lo, hi) => { + let lo = lo.chars().next().unwrap() as u32; + let hi = hi.chars().next().unwrap() as u32; + let range = hi - lo + 1; + let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap(); + out.push(ch); + } + Expr::Ident(name) => match name.as_str() { + "ANY" => { + let ch = (b'a' + self.pick() % 26) as char; + out.push(ch); + } + "NEWLINE" => out.push('\n'), + "SOI" | "EOI" => {} + "ASCII_DIGIT" => { + let d = (b'0' + self.pick() % 10) as char; + out.push(d); + } + _ => { + // Try override first, fall back to grammar walk + if !self.try_override(name, out) { + if let Some((_ty, expr)) = self.rules.get(name) { + let expr = expr.clone(); + self.emit(&expr, out); + } + } + } + }, + Expr::Seq(a, b) => { + self.emit(a, out); + self.emit(b, out); + } + Expr::Choice(a, b) => { + let mut alts: Vec<&Expr> = vec![a.as_ref()]; + let mut cur = b.as_ref(); + while let Expr::Choice(l, r) = cur { + alts.push(l.as_ref()); + cur = r.as_ref(); + } + alts.push(cur); + let idx = self.pick() as usize % alts.len(); + self.emit(alts[idx], out); + } + Expr::Opt(inner) => { + if self.pick() % 3 != 0 { + // ~66% chance of emitting + self.emit(inner, out); + } + } + Expr::Rep(inner) => { + // 1–4 reps (never 0 — avoid degenerate empty output) + let count = 1 + self.pick() % 4; + for _ in 0..count { + self.emit(inner, out); + } + } + Expr::RepOnce(inner) => { + let count = 1 + self.pick() % 4; + for _ in 0..count { + self.emit(inner, out); + } + } + Expr::NegPred(_) | Expr::PosPred(_) => {} + _ => {} + } + } + + fn generate(&mut self, rule_name: &str) -> Option { + // Check override first (for top-level rule invocation) + let mut out = String::new(); + if self.try_override(rule_name, &mut out) { + return Some(out); + } + let (_ty, expr) = self.rules.get(rule_name)?.clone(); + self.emit(&expr, &mut out); + Some(out) + } +} + +fn print_rules(rules: &HashMap) { + let mut names: Vec<_> = rules.keys().collect(); + names.sort(); + for name in names { + println!(" {name}"); + } +} + +fn main() { + let args: Vec = std::env::args().collect(); + let rules = load_grammar(); + + if args.len() < 2 { + eprintln!("Usage: {} ", args[0]); + eprintln!(); + eprintln!("Available rules:"); + print_rules(&rules); + std::process::exit(2); + } + let rule = &args[1]; + + if !rules.contains_key(rule) { + eprintln!("Unknown rule '{rule}'. Available rules:"); + print_rules(&rules); + std::process::exit(1); + } + + let seed = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0) + ^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15); + + let mut g = Gen::new(&rules, seed); + match g.generate(rule) { + Some(out) => print!("{out}"), + None => { + eprintln!("Failed to generate from rule '{rule}'"); + std::process::exit(1); + } + } +} diff --git a/examples/pretty-print.rs b/examples/pretty-print.rs new file mode 100644 index 0000000..a0ceacb --- /dev/null +++ b/examples/pretty-print.rs @@ -0,0 +1,25 @@ +//! Parse a `.improv` file from stdin and print the formatted result to stdout. +//! +//! Usage: +//! cargo run --example pretty-print < file.improv +//! cargo run --example gen-grammar -- file | cargo run --example pretty-print + +use std::io::Read; + +fn main() { + let mut input = String::new(); + if let Err(e) = std::io::stdin().read_to_string(&mut input) { + eprintln!("Failed to read stdin: {e}"); + std::process::exit(1); + } + + match improvise::persistence::parse_md(&input) { + Ok(model) => { + print!("{}", improvise::persistence::format_md(&model)); + } + Err(e) => { + eprintln!("Parse error: {e:#}"); + std::process::exit(1); + } + } +}