Files
improvise/examples/gen-grammar.rs
Edward Langley 5807464fc7 refactor(io): move persistence and import into improvise-io (improvise-8zh)
Relocate the two I/O module trees into the improvise-io sub-crate
scaffolded in the previous commit:

  git mv src/persistence -> crates/improvise-io/src/persistence
  git mv src/import      -> crates/improvise-io/src/import

The grammar file `improv.pest` moves alongside `persistence/mod.rs`;
the `#[grammar = "persistence/improv.pest"]` attribute resolves relative
to the new crate root and keeps working unchanged.

No path edits inside the moved code: the `crate::model::*`,
`crate::view::*`, `crate::workbook::*`, `crate::format::*`, and
`crate::formula::*` imports inside persistence and import all continue
to resolve because improvise-io's lib.rs re-exports those modules from
improvise-core and improvise-formula, mirroring the pattern improvise-core
uses for `formula`. Verified no `crate::ui::*`, `crate::command::*`,
`crate::draw::*` imports exist in the moved code (per improvise-8zh
acceptance criterion #3).

Main-crate `src/lib.rs` now re-exports `import` and `persistence` from
improvise-io, keeping every `crate::persistence::*` and `crate::import::*`
path in the 4 consumer files (ui/app.rs, ui/effect.rs,
ui/import_wizard_ui.rs, main.rs) resolving unchanged — no downstream
edits needed.

`examples/gen-grammar.rs` had `include_str!("../src/persistence/improv.pest")`;
updated the relative path to the new location under
`crates/improvise-io/src/persistence/`.

Verification:
- cargo check --workspace --examples: clean
- cargo test --workspace: 616 passing (219 main + 190 core + 65 formula + 142 io)
- cargo clippy --workspace --tests: clean
- cargo build -p improvise-io: standalone build succeeds, confirming no
  UI/command leakage into the IO crate (improvise-8zh acceptance #2, #3)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 23:08:00 -07:00

345 lines
10 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Generate a random valid example matching a rule from `improv.pest`.
//!
//! Usage:
//! cargo run --example gen-grammar -- <rule_name>
//!
//! Examples:
//! cargo run --example gen-grammar -- file
//! cargo run --example gen-grammar -- category_section
//! cargo run --example gen-grammar -- bare_name
//!
//! Each invocation generates one example seeded from the current time + PID.
//!
//! The generator adds constraints beyond what the grammar requires to produce
//! realistic, round-trippable output:
//! - bare names are drawn from a word pool instead of random letters
//! - pipe_inner is never empty
//! - rest_of_line always produces at least one character
//! - repetitions (`*`) produce 14 items, not 0
use pest_meta::ast::{Expr, RuleType};
use pest_meta::parser;
use std::collections::HashMap;
const GRAMMAR: &str =
include_str!("../crates/improvise-io/src/persistence/improv.pest");
fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR)
.unwrap_or_else(|e| panic!("Bad grammar: {e}"));
let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
rules
.into_iter()
.map(|r| (r.name.clone(), (r.ty, r.expr)))
.collect()
}
// ── Word pools for realistic output ─────────────────────────────────────────
const BARE_WORDS: &[&str] = &[
"Region",
"Product",
"Customer",
"Channel",
"Date",
"North",
"South",
"East",
"West",
"Revenue",
"Cost",
"Profit",
"Margin",
"Widgets",
"Gadgets",
"Sprockets",
"Q1",
"Q2",
"Q3",
"Q4",
"Jan",
"Feb",
"Mar",
"Apr",
"Acme",
"Globex",
"Initech",
"Umbrella",
];
const QUOTED_WORDS: &[&str] = &[
"Total Revenue",
"Net Income",
"Gross Margin",
"2025-01",
"2025-02",
"2025-03",
"East Coast",
"West Coast",
"Acme Corp",
"Globex Inc",
"Cost of Goods",
"Operating Expense",
];
const MODEL_NAMES: &[&str] = &[
"Sales Report",
"Budget 2025",
"Quarterly Review",
"Inventory Model",
"Revenue Analysis",
"Demo Model",
];
const VIEW_NAMES: &[&str] = &["Default", "Summary", "Detail", "By Region", "Monthly"];
const FORMULA_EXPRS: &[&str] = &[
"Profit = Revenue - Cost",
"Margin = Profit / Revenue",
"Tax = Revenue * 0.1",
"Total = SUM(Revenue)",
"Net = Revenue - Cost - Tax",
];
const FORMAT_STRINGS: &[&str] = &[",.0", ",.2f", ",.1f", ".0%"];
// ── PRNG ────────────────────────────────────────────────────────────────────
struct Xs64(u64);
impl Xs64 {
fn new(seed: u64) -> Self {
Self(seed.max(1))
}
fn next(&mut self) -> u64 {
self.0 ^= self.0 << 13;
self.0 ^= self.0 >> 7;
self.0 ^= self.0 << 17;
self.0
}
fn byte(&mut self) -> u8 {
(self.next() & 0xff) as u8
}
fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str {
pool[self.next() as usize % pool.len()]
}
}
// ── Generator ───────────────────────────────────────────────────────────────
struct Gen<'g> {
rules: &'g HashMap<String, (RuleType, Expr)>,
rng: Xs64,
}
impl<'g> Gen<'g> {
fn new(rules: &'g HashMap<String, (RuleType, Expr)>, seed: u64) -> Self {
Self {
rules,
rng: Xs64::new(seed),
}
}
fn pick(&mut self) -> u8 {
self.rng.byte()
}
/// Try a rule-specific override. Returns true if handled.
fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool {
match rule_name {
"bare_name" => {
out.push_str(self.rng.pick_from(BARE_WORDS));
true
}
"pipe_inner" => {
// Never empty
out.push_str(self.rng.pick_from(QUOTED_WORDS));
true
}
"rest_of_line" => {
// Context-sensitive: produce something non-empty
let word_count = 1 + self.pick() % 3;
for i in 0..word_count {
if i > 0 {
out.push(' ');
}
out.push_str(self.rng.pick_from(BARE_WORDS));
}
true
}
"model_name" => {
out.push_str("# ");
out.push_str(self.rng.pick_from(MODEL_NAMES));
out.push('\n');
true
}
"format_line" => {
out.push_str("format: ");
out.push_str(self.rng.pick_from(FORMAT_STRINGS));
out.push('\n');
true
}
"formula_line" => {
out.push_str("- ");
out.push_str(self.rng.pick_from(FORMULA_EXPRS));
out.push('\n');
true
}
"number" => {
let whole = 1 + self.rng.next() % 99999;
if self.pick().is_multiple_of(3) {
let frac = self.rng.next() % 100;
out.push_str(&format!("{whole}.{frac:02}"));
} else {
out.push_str(&format!("{whole}"));
}
true
}
"axis_kind" => {
let kinds = ["row", "column", "page", "none"];
out.push_str(kinds[self.pick() as usize % kinds.len()]);
true
}
"view_section" => {
out.push_str("## View: ");
out.push_str(self.rng.pick_from(VIEW_NAMES));
out.push('\n');
// Generate view_entry* from the grammar
let count = 1 + self.pick() % 4;
if let Some((_ty, expr)) = self.rules.get("view_entry") {
let expr = expr.clone();
for _ in 0..count {
self.emit(&expr, out);
}
}
true
}
_ => false,
}
}
fn emit(&mut self, expr: &Expr, out: &mut String) {
match expr {
Expr::Str(s) => out.push_str(s),
Expr::Range(lo, hi) => {
let lo = lo.chars().next().unwrap() as u32;
let hi = hi.chars().next().unwrap() as u32;
let range = hi - lo + 1;
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
out.push(ch);
}
Expr::Ident(name) => match name.as_str() {
"ANY" => {
let ch = (b'a' + self.pick() % 26) as char;
out.push(ch);
}
"NEWLINE" => out.push('\n'),
"SOI" | "EOI" => {}
"ASCII_DIGIT" => {
let d = (b'0' + self.pick() % 10) as char;
out.push(d);
}
_ => {
// Try override first, fall back to grammar walk
if !self.try_override(name, out)
&& let Some((_ty, expr)) = self.rules.get(name)
{
let expr = expr.clone();
self.emit(&expr, out);
}
}
},
Expr::Seq(a, b) => {
self.emit(a, out);
self.emit(b, out);
}
Expr::Choice(a, b) => {
let mut alts: Vec<&Expr> = vec![a.as_ref()];
let mut cur = b.as_ref();
while let Expr::Choice(l, r) = cur {
alts.push(l.as_ref());
cur = r.as_ref();
}
alts.push(cur);
let idx = self.pick() as usize % alts.len();
self.emit(alts[idx], out);
}
Expr::Opt(inner) => {
if !self.pick().is_multiple_of(3) {
// ~66% chance of emitting
self.emit(inner, out);
}
}
Expr::Rep(inner) => {
// 14 reps (never 0 — avoid degenerate empty output)
let count = 1 + self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::RepOnce(inner) => {
let count = 1 + self.pick() % 4;
for _ in 0..count {
self.emit(inner, out);
}
}
Expr::NegPred(_) | Expr::PosPred(_) => {}
_ => {}
}
}
fn generate(&mut self, rule_name: &str) -> Option<String> {
// Check override first (for top-level rule invocation)
let mut out = String::new();
if self.try_override(rule_name, &mut out) {
return Some(out);
}
let (_ty, expr) = self.rules.get(rule_name)?.clone();
self.emit(&expr, &mut out);
Some(out)
}
}
fn print_rules(rules: &HashMap<String, (RuleType, Expr)>) {
let mut names: Vec<_> = rules.keys().collect();
names.sort();
for name in names {
println!(" {name}");
}
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let rules = load_grammar();
if args.len() < 2 {
eprintln!("Usage: {} <rule_name>", args[0]);
eprintln!();
eprintln!("Available rules:");
print_rules(&rules);
std::process::exit(2);
}
let rule = &args[1];
if !rules.contains_key(rule) {
eprintln!("Unknown rule '{rule}'. Available rules:");
print_rules(&rules);
std::process::exit(1);
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0)
^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15);
let mut g = Gen::new(&rules, seed);
match g.generate(rule) {
Some(out) => print!("{out}"),
None => {
eprintln!("Failed to generate from rule '{rule}'");
std::process::exit(1);
}
}
}