feat(examples): add grammar generation and pretty-printing utilities
Add new examples for generating sample .improv data based on the Pest grammar and pretty-printing existing .improv files. Co-Authored-By: fiddlerwoaroof/git-smart-commit (gemma-4-31B-it-UD-Q4_K_XL.gguf)
This commit is contained in:
315
examples/gen-grammar.rs
Normal file
315
examples/gen-grammar.rs
Normal file
@ -0,0 +1,315 @@
|
||||
//! Generate a random valid example matching a rule from `improv.pest`.
|
||||
//!
|
||||
//! Usage:
|
||||
//! cargo run --example gen-grammar -- <rule_name>
|
||||
//!
|
||||
//! Examples:
|
||||
//! cargo run --example gen-grammar -- file
|
||||
//! cargo run --example gen-grammar -- category_section
|
||||
//! cargo run --example gen-grammar -- bare_name
|
||||
//!
|
||||
//! Each invocation generates one example seeded from the current time + PID.
|
||||
//!
|
||||
//! The generator adds constraints beyond what the grammar requires to produce
|
||||
//! realistic, round-trippable output:
|
||||
//! - bare names are drawn from a word pool instead of random letters
|
||||
//! - pipe_inner is never empty
|
||||
//! - rest_of_line always produces at least one character
|
||||
//! - repetitions (`*`) produce 1–4 items, not 0
|
||||
|
||||
use pest_meta::ast::{Expr, RuleType};
|
||||
use pest_meta::parser;
|
||||
use std::collections::HashMap;
|
||||
|
||||
const GRAMMAR: &str = include_str!("../src/persistence/improv.pest");
|
||||
|
||||
fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
|
||||
let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR)
|
||||
.unwrap_or_else(|e| panic!("Bad grammar: {e}"));
|
||||
let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
|
||||
rules
|
||||
.into_iter()
|
||||
.map(|r| (r.name.clone(), (r.ty, r.expr)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ── Word pools for realistic output ─────────────────────────────────────────
|
||||
|
||||
const BARE_WORDS: &[&str] = &[
|
||||
"Region", "Product", "Customer", "Channel", "Date",
|
||||
"North", "South", "East", "West",
|
||||
"Revenue", "Cost", "Profit", "Margin",
|
||||
"Widgets", "Gadgets", "Sprockets",
|
||||
"Q1", "Q2", "Q3", "Q4",
|
||||
"Jan", "Feb", "Mar", "Apr",
|
||||
"Acme", "Globex", "Initech", "Umbrella",
|
||||
];
|
||||
|
||||
const QUOTED_WORDS: &[&str] = &[
|
||||
"Total Revenue", "Net Income", "Gross Margin",
|
||||
"2025-01", "2025-02", "2025-03",
|
||||
"East Coast", "West Coast",
|
||||
"Acme Corp", "Globex Inc",
|
||||
"Cost of Goods", "Operating Expense",
|
||||
];
|
||||
|
||||
const MODEL_NAMES: &[&str] = &[
|
||||
"Sales Report", "Budget 2025", "Quarterly Review",
|
||||
"Inventory Model", "Revenue Analysis", "Demo Model",
|
||||
];
|
||||
|
||||
const VIEW_NAMES: &[&str] = &[
|
||||
"Default", "Summary", "Detail", "By Region", "Monthly",
|
||||
];
|
||||
|
||||
const FORMULA_EXPRS: &[&str] = &[
|
||||
"Profit = Revenue - Cost",
|
||||
"Margin = Profit / Revenue",
|
||||
"Tax = Revenue * 0.1",
|
||||
"Total = SUM(Revenue)",
|
||||
"Net = Revenue - Cost - Tax",
|
||||
];
|
||||
|
||||
const FORMAT_STRINGS: &[&str] = &[
|
||||
",.0", ",.2f", ",.1f", ".0%",
|
||||
];
|
||||
|
||||
// ── PRNG ────────────────────────────────────────────────────────────────────
|
||||
|
||||
struct Xs64(u64);
|
||||
|
||||
impl Xs64 {
|
||||
fn new(seed: u64) -> Self {
|
||||
Self(seed.max(1))
|
||||
}
|
||||
fn next(&mut self) -> u64 {
|
||||
self.0 ^= self.0 << 13;
|
||||
self.0 ^= self.0 >> 7;
|
||||
self.0 ^= self.0 << 17;
|
||||
self.0
|
||||
}
|
||||
fn byte(&mut self) -> u8 {
|
||||
(self.next() & 0xff) as u8
|
||||
}
|
||||
fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str {
|
||||
pool[self.next() as usize % pool.len()]
|
||||
}
|
||||
}
|
||||
|
||||
// ── Generator ───────────────────────────────────────────────────────────────
|
||||
|
||||
struct Gen<'g> {
|
||||
rules: &'g HashMap<String, (RuleType, Expr)>,
|
||||
rng: Xs64,
|
||||
}
|
||||
|
||||
impl<'g> Gen<'g> {
|
||||
fn new(rules: &'g HashMap<String, (RuleType, Expr)>, seed: u64) -> Self {
|
||||
Self {
|
||||
rules,
|
||||
rng: Xs64::new(seed),
|
||||
}
|
||||
}
|
||||
|
||||
fn pick(&mut self) -> u8 {
|
||||
self.rng.byte()
|
||||
}
|
||||
|
||||
/// Try a rule-specific override. Returns true if handled.
|
||||
fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool {
|
||||
match rule_name {
|
||||
"bare_name" => {
|
||||
out.push_str(self.rng.pick_from(BARE_WORDS));
|
||||
true
|
||||
}
|
||||
"pipe_inner" => {
|
||||
// Never empty
|
||||
out.push_str(self.rng.pick_from(QUOTED_WORDS));
|
||||
true
|
||||
}
|
||||
"rest_of_line" => {
|
||||
// Context-sensitive: produce something non-empty
|
||||
let word_count = 1 + self.pick() % 3;
|
||||
for i in 0..word_count {
|
||||
if i > 0 {
|
||||
out.push(' ');
|
||||
}
|
||||
out.push_str(self.rng.pick_from(BARE_WORDS));
|
||||
}
|
||||
true
|
||||
}
|
||||
"model_name" => {
|
||||
out.push_str("# ");
|
||||
out.push_str(self.rng.pick_from(MODEL_NAMES));
|
||||
out.push('\n');
|
||||
true
|
||||
}
|
||||
"format_line" => {
|
||||
out.push_str("format: ");
|
||||
out.push_str(self.rng.pick_from(FORMAT_STRINGS));
|
||||
out.push('\n');
|
||||
true
|
||||
}
|
||||
"formula_line" => {
|
||||
out.push_str("- ");
|
||||
out.push_str(self.rng.pick_from(FORMULA_EXPRS));
|
||||
out.push('\n');
|
||||
true
|
||||
}
|
||||
"number" => {
|
||||
let whole = 1 + self.rng.next() % 99999;
|
||||
if self.pick() % 3 == 0 {
|
||||
let frac = self.rng.next() % 100;
|
||||
out.push_str(&format!("{whole}.{frac:02}"));
|
||||
} else {
|
||||
out.push_str(&format!("{whole}"));
|
||||
}
|
||||
true
|
||||
}
|
||||
"axis_kind" => {
|
||||
let kinds = ["row", "column", "page", "none"];
|
||||
out.push_str(kinds[self.pick() as usize % kinds.len()]);
|
||||
true
|
||||
}
|
||||
"view_section" => {
|
||||
out.push_str("## View: ");
|
||||
out.push_str(self.rng.pick_from(VIEW_NAMES));
|
||||
out.push('\n');
|
||||
// Generate view_entry* from the grammar
|
||||
let count = 1 + self.pick() % 4;
|
||||
if let Some((_ty, expr)) = self.rules.get("view_entry") {
|
||||
let expr = expr.clone();
|
||||
for _ in 0..count {
|
||||
self.emit(&expr, out);
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn emit(&mut self, expr: &Expr, out: &mut String) {
|
||||
match expr {
|
||||
Expr::Str(s) => out.push_str(s),
|
||||
Expr::Range(lo, hi) => {
|
||||
let lo = lo.chars().next().unwrap() as u32;
|
||||
let hi = hi.chars().next().unwrap() as u32;
|
||||
let range = hi - lo + 1;
|
||||
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
|
||||
out.push(ch);
|
||||
}
|
||||
Expr::Ident(name) => match name.as_str() {
|
||||
"ANY" => {
|
||||
let ch = (b'a' + self.pick() % 26) as char;
|
||||
out.push(ch);
|
||||
}
|
||||
"NEWLINE" => out.push('\n'),
|
||||
"SOI" | "EOI" => {}
|
||||
"ASCII_DIGIT" => {
|
||||
let d = (b'0' + self.pick() % 10) as char;
|
||||
out.push(d);
|
||||
}
|
||||
_ => {
|
||||
// Try override first, fall back to grammar walk
|
||||
if !self.try_override(name, out) {
|
||||
if let Some((_ty, expr)) = self.rules.get(name) {
|
||||
let expr = expr.clone();
|
||||
self.emit(&expr, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Expr::Seq(a, b) => {
|
||||
self.emit(a, out);
|
||||
self.emit(b, out);
|
||||
}
|
||||
Expr::Choice(a, b) => {
|
||||
let mut alts: Vec<&Expr> = vec![a.as_ref()];
|
||||
let mut cur = b.as_ref();
|
||||
while let Expr::Choice(l, r) = cur {
|
||||
alts.push(l.as_ref());
|
||||
cur = r.as_ref();
|
||||
}
|
||||
alts.push(cur);
|
||||
let idx = self.pick() as usize % alts.len();
|
||||
self.emit(alts[idx], out);
|
||||
}
|
||||
Expr::Opt(inner) => {
|
||||
if self.pick() % 3 != 0 {
|
||||
// ~66% chance of emitting
|
||||
self.emit(inner, out);
|
||||
}
|
||||
}
|
||||
Expr::Rep(inner) => {
|
||||
// 1–4 reps (never 0 — avoid degenerate empty output)
|
||||
let count = 1 + self.pick() % 4;
|
||||
for _ in 0..count {
|
||||
self.emit(inner, out);
|
||||
}
|
||||
}
|
||||
Expr::RepOnce(inner) => {
|
||||
let count = 1 + self.pick() % 4;
|
||||
for _ in 0..count {
|
||||
self.emit(inner, out);
|
||||
}
|
||||
}
|
||||
Expr::NegPred(_) | Expr::PosPred(_) => {}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn generate(&mut self, rule_name: &str) -> Option<String> {
|
||||
// Check override first (for top-level rule invocation)
|
||||
let mut out = String::new();
|
||||
if self.try_override(rule_name, &mut out) {
|
||||
return Some(out);
|
||||
}
|
||||
let (_ty, expr) = self.rules.get(rule_name)?.clone();
|
||||
self.emit(&expr, &mut out);
|
||||
Some(out)
|
||||
}
|
||||
}
|
||||
|
||||
fn print_rules(rules: &HashMap<String, (RuleType, Expr)>) {
|
||||
let mut names: Vec<_> = rules.keys().collect();
|
||||
names.sort();
|
||||
for name in names {
|
||||
println!(" {name}");
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
let rules = load_grammar();
|
||||
|
||||
if args.len() < 2 {
|
||||
eprintln!("Usage: {} <rule_name>", args[0]);
|
||||
eprintln!();
|
||||
eprintln!("Available rules:");
|
||||
print_rules(&rules);
|
||||
std::process::exit(2);
|
||||
}
|
||||
let rule = &args[1];
|
||||
|
||||
if !rules.contains_key(rule) {
|
||||
eprintln!("Unknown rule '{rule}'. Available rules:");
|
||||
print_rules(&rules);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let seed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0)
|
||||
^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15);
|
||||
|
||||
let mut g = Gen::new(&rules, seed);
|
||||
match g.generate(rule) {
|
||||
Some(out) => print!("{out}"),
|
||||
None => {
|
||||
eprintln!("Failed to generate from rule '{rule}'");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
25
examples/pretty-print.rs
Normal file
25
examples/pretty-print.rs
Normal file
@ -0,0 +1,25 @@
|
||||
//! Parse a `.improv` file from stdin and print the formatted result to stdout.
|
||||
//!
|
||||
//! Usage:
|
||||
//! cargo run --example pretty-print < file.improv
|
||||
//! cargo run --example gen-grammar -- file | cargo run --example pretty-print
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
fn main() {
|
||||
let mut input = String::new();
|
||||
if let Err(e) = std::io::stdin().read_to_string(&mut input) {
|
||||
eprintln!("Failed to read stdin: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
match improvise::persistence::parse_md(&input) {
|
||||
Ok(model) => {
|
||||
print!("{}", improvise::persistence::format_md(&model));
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Parse error: {e:#}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user