Relocate the two I/O module trees into the improvise-io sub-crate scaffolded in the previous commit: git mv src/persistence -> crates/improvise-io/src/persistence git mv src/import -> crates/improvise-io/src/import The grammar file `improv.pest` moves alongside `persistence/mod.rs`; the `#[grammar = "persistence/improv.pest"]` attribute resolves relative to the new crate root and keeps working unchanged. No path edits inside the moved code: the `crate::model::*`, `crate::view::*`, `crate::workbook::*`, `crate::format::*`, and `crate::formula::*` imports inside persistence and import all continue to resolve because improvise-io's lib.rs re-exports those modules from improvise-core and improvise-formula, mirroring the pattern improvise-core uses for `formula`. Verified no `crate::ui::*`, `crate::command::*`, `crate::draw::*` imports exist in the moved code (per improvise-8zh acceptance criterion #3). Main-crate `src/lib.rs` now re-exports `import` and `persistence` from improvise-io, keeping every `crate::persistence::*` and `crate::import::*` path in the 4 consumer files (ui/app.rs, ui/effect.rs, ui/import_wizard_ui.rs, main.rs) resolving unchanged — no downstream edits needed. `examples/gen-grammar.rs` had `include_str!("../src/persistence/improv.pest")`; updated the relative path to the new location under `crates/improvise-io/src/persistence/`. Verification: - cargo check --workspace --examples: clean - cargo test --workspace: 616 passing (219 main + 190 core + 65 formula + 142 io) - cargo clippy --workspace --tests: clean - cargo build -p improvise-io: standalone build succeeds, confirming no UI/command leakage into the IO crate (improvise-8zh acceptance #2, #3) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
345 lines
10 KiB
Rust
345 lines
10 KiB
Rust
//! Generate a random valid example matching a rule from `improv.pest`.
|
||
//!
|
||
//! Usage:
|
||
//! cargo run --example gen-grammar -- <rule_name>
|
||
//!
|
||
//! Examples:
|
||
//! cargo run --example gen-grammar -- file
|
||
//! cargo run --example gen-grammar -- category_section
|
||
//! cargo run --example gen-grammar -- bare_name
|
||
//!
|
||
//! Each invocation generates one example seeded from the current time + PID.
|
||
//!
|
||
//! The generator adds constraints beyond what the grammar requires to produce
|
||
//! realistic, round-trippable output:
|
||
//! - bare names are drawn from a word pool instead of random letters
|
||
//! - pipe_inner is never empty
|
||
//! - rest_of_line always produces at least one character
|
||
//! - repetitions (`*`) produce 1–4 items, not 0
|
||
|
||
use pest_meta::ast::{Expr, RuleType};
|
||
use pest_meta::parser;
|
||
use std::collections::HashMap;
|
||
|
||
const GRAMMAR: &str =
|
||
include_str!("../crates/improvise-io/src/persistence/improv.pest");
|
||
|
||
fn load_grammar() -> HashMap<String, (RuleType, Expr)> {
|
||
let pairs = parser::parse(parser::Rule::grammar_rules, GRAMMAR)
|
||
.unwrap_or_else(|e| panic!("Bad grammar: {e}"));
|
||
let rules = parser::consume_rules(pairs).unwrap_or_else(|e| panic!("{e:?}"));
|
||
rules
|
||
.into_iter()
|
||
.map(|r| (r.name.clone(), (r.ty, r.expr)))
|
||
.collect()
|
||
}
|
||
|
||
// ── Word pools for realistic output ─────────────────────────────────────────
|
||
|
||
const BARE_WORDS: &[&str] = &[
|
||
"Region",
|
||
"Product",
|
||
"Customer",
|
||
"Channel",
|
||
"Date",
|
||
"North",
|
||
"South",
|
||
"East",
|
||
"West",
|
||
"Revenue",
|
||
"Cost",
|
||
"Profit",
|
||
"Margin",
|
||
"Widgets",
|
||
"Gadgets",
|
||
"Sprockets",
|
||
"Q1",
|
||
"Q2",
|
||
"Q3",
|
||
"Q4",
|
||
"Jan",
|
||
"Feb",
|
||
"Mar",
|
||
"Apr",
|
||
"Acme",
|
||
"Globex",
|
||
"Initech",
|
||
"Umbrella",
|
||
];
|
||
|
||
const QUOTED_WORDS: &[&str] = &[
|
||
"Total Revenue",
|
||
"Net Income",
|
||
"Gross Margin",
|
||
"2025-01",
|
||
"2025-02",
|
||
"2025-03",
|
||
"East Coast",
|
||
"West Coast",
|
||
"Acme Corp",
|
||
"Globex Inc",
|
||
"Cost of Goods",
|
||
"Operating Expense",
|
||
];
|
||
|
||
const MODEL_NAMES: &[&str] = &[
|
||
"Sales Report",
|
||
"Budget 2025",
|
||
"Quarterly Review",
|
||
"Inventory Model",
|
||
"Revenue Analysis",
|
||
"Demo Model",
|
||
];
|
||
|
||
const VIEW_NAMES: &[&str] = &["Default", "Summary", "Detail", "By Region", "Monthly"];
|
||
|
||
const FORMULA_EXPRS: &[&str] = &[
|
||
"Profit = Revenue - Cost",
|
||
"Margin = Profit / Revenue",
|
||
"Tax = Revenue * 0.1",
|
||
"Total = SUM(Revenue)",
|
||
"Net = Revenue - Cost - Tax",
|
||
];
|
||
|
||
const FORMAT_STRINGS: &[&str] = &[",.0", ",.2f", ",.1f", ".0%"];
|
||
|
||
// ── PRNG ────────────────────────────────────────────────────────────────────
|
||
|
||
struct Xs64(u64);
|
||
|
||
impl Xs64 {
|
||
fn new(seed: u64) -> Self {
|
||
Self(seed.max(1))
|
||
}
|
||
fn next(&mut self) -> u64 {
|
||
self.0 ^= self.0 << 13;
|
||
self.0 ^= self.0 >> 7;
|
||
self.0 ^= self.0 << 17;
|
||
self.0
|
||
}
|
||
fn byte(&mut self) -> u8 {
|
||
(self.next() & 0xff) as u8
|
||
}
|
||
fn pick_from<'a>(&mut self, pool: &[&'a str]) -> &'a str {
|
||
pool[self.next() as usize % pool.len()]
|
||
}
|
||
}
|
||
|
||
// ── Generator ───────────────────────────────────────────────────────────────
|
||
|
||
struct Gen<'g> {
|
||
rules: &'g HashMap<String, (RuleType, Expr)>,
|
||
rng: Xs64,
|
||
}
|
||
|
||
impl<'g> Gen<'g> {
|
||
fn new(rules: &'g HashMap<String, (RuleType, Expr)>, seed: u64) -> Self {
|
||
Self {
|
||
rules,
|
||
rng: Xs64::new(seed),
|
||
}
|
||
}
|
||
|
||
fn pick(&mut self) -> u8 {
|
||
self.rng.byte()
|
||
}
|
||
|
||
/// Try a rule-specific override. Returns true if handled.
|
||
fn try_override(&mut self, rule_name: &str, out: &mut String) -> bool {
|
||
match rule_name {
|
||
"bare_name" => {
|
||
out.push_str(self.rng.pick_from(BARE_WORDS));
|
||
true
|
||
}
|
||
"pipe_inner" => {
|
||
// Never empty
|
||
out.push_str(self.rng.pick_from(QUOTED_WORDS));
|
||
true
|
||
}
|
||
"rest_of_line" => {
|
||
// Context-sensitive: produce something non-empty
|
||
let word_count = 1 + self.pick() % 3;
|
||
for i in 0..word_count {
|
||
if i > 0 {
|
||
out.push(' ');
|
||
}
|
||
out.push_str(self.rng.pick_from(BARE_WORDS));
|
||
}
|
||
true
|
||
}
|
||
"model_name" => {
|
||
out.push_str("# ");
|
||
out.push_str(self.rng.pick_from(MODEL_NAMES));
|
||
out.push('\n');
|
||
true
|
||
}
|
||
"format_line" => {
|
||
out.push_str("format: ");
|
||
out.push_str(self.rng.pick_from(FORMAT_STRINGS));
|
||
out.push('\n');
|
||
true
|
||
}
|
||
"formula_line" => {
|
||
out.push_str("- ");
|
||
out.push_str(self.rng.pick_from(FORMULA_EXPRS));
|
||
out.push('\n');
|
||
true
|
||
}
|
||
"number" => {
|
||
let whole = 1 + self.rng.next() % 99999;
|
||
if self.pick().is_multiple_of(3) {
|
||
let frac = self.rng.next() % 100;
|
||
out.push_str(&format!("{whole}.{frac:02}"));
|
||
} else {
|
||
out.push_str(&format!("{whole}"));
|
||
}
|
||
true
|
||
}
|
||
"axis_kind" => {
|
||
let kinds = ["row", "column", "page", "none"];
|
||
out.push_str(kinds[self.pick() as usize % kinds.len()]);
|
||
true
|
||
}
|
||
"view_section" => {
|
||
out.push_str("## View: ");
|
||
out.push_str(self.rng.pick_from(VIEW_NAMES));
|
||
out.push('\n');
|
||
// Generate view_entry* from the grammar
|
||
let count = 1 + self.pick() % 4;
|
||
if let Some((_ty, expr)) = self.rules.get("view_entry") {
|
||
let expr = expr.clone();
|
||
for _ in 0..count {
|
||
self.emit(&expr, out);
|
||
}
|
||
}
|
||
true
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
fn emit(&mut self, expr: &Expr, out: &mut String) {
|
||
match expr {
|
||
Expr::Str(s) => out.push_str(s),
|
||
Expr::Range(lo, hi) => {
|
||
let lo = lo.chars().next().unwrap() as u32;
|
||
let hi = hi.chars().next().unwrap() as u32;
|
||
let range = hi - lo + 1;
|
||
let ch = char::from_u32(lo + (self.pick() as u32 % range)).unwrap();
|
||
out.push(ch);
|
||
}
|
||
Expr::Ident(name) => match name.as_str() {
|
||
"ANY" => {
|
||
let ch = (b'a' + self.pick() % 26) as char;
|
||
out.push(ch);
|
||
}
|
||
"NEWLINE" => out.push('\n'),
|
||
"SOI" | "EOI" => {}
|
||
"ASCII_DIGIT" => {
|
||
let d = (b'0' + self.pick() % 10) as char;
|
||
out.push(d);
|
||
}
|
||
_ => {
|
||
// Try override first, fall back to grammar walk
|
||
if !self.try_override(name, out)
|
||
&& let Some((_ty, expr)) = self.rules.get(name)
|
||
{
|
||
let expr = expr.clone();
|
||
self.emit(&expr, out);
|
||
}
|
||
}
|
||
},
|
||
Expr::Seq(a, b) => {
|
||
self.emit(a, out);
|
||
self.emit(b, out);
|
||
}
|
||
Expr::Choice(a, b) => {
|
||
let mut alts: Vec<&Expr> = vec![a.as_ref()];
|
||
let mut cur = b.as_ref();
|
||
while let Expr::Choice(l, r) = cur {
|
||
alts.push(l.as_ref());
|
||
cur = r.as_ref();
|
||
}
|
||
alts.push(cur);
|
||
let idx = self.pick() as usize % alts.len();
|
||
self.emit(alts[idx], out);
|
||
}
|
||
Expr::Opt(inner) => {
|
||
if !self.pick().is_multiple_of(3) {
|
||
// ~66% chance of emitting
|
||
self.emit(inner, out);
|
||
}
|
||
}
|
||
Expr::Rep(inner) => {
|
||
// 1–4 reps (never 0 — avoid degenerate empty output)
|
||
let count = 1 + self.pick() % 4;
|
||
for _ in 0..count {
|
||
self.emit(inner, out);
|
||
}
|
||
}
|
||
Expr::RepOnce(inner) => {
|
||
let count = 1 + self.pick() % 4;
|
||
for _ in 0..count {
|
||
self.emit(inner, out);
|
||
}
|
||
}
|
||
Expr::NegPred(_) | Expr::PosPred(_) => {}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
fn generate(&mut self, rule_name: &str) -> Option<String> {
|
||
// Check override first (for top-level rule invocation)
|
||
let mut out = String::new();
|
||
if self.try_override(rule_name, &mut out) {
|
||
return Some(out);
|
||
}
|
||
let (_ty, expr) = self.rules.get(rule_name)?.clone();
|
||
self.emit(&expr, &mut out);
|
||
Some(out)
|
||
}
|
||
}
|
||
|
||
fn print_rules(rules: &HashMap<String, (RuleType, Expr)>) {
|
||
let mut names: Vec<_> = rules.keys().collect();
|
||
names.sort();
|
||
for name in names {
|
||
println!(" {name}");
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
let args: Vec<String> = std::env::args().collect();
|
||
let rules = load_grammar();
|
||
|
||
if args.len() < 2 {
|
||
eprintln!("Usage: {} <rule_name>", args[0]);
|
||
eprintln!();
|
||
eprintln!("Available rules:");
|
||
print_rules(&rules);
|
||
std::process::exit(2);
|
||
}
|
||
let rule = &args[1];
|
||
|
||
if !rules.contains_key(rule) {
|
||
eprintln!("Unknown rule '{rule}'. Available rules:");
|
||
print_rules(&rules);
|
||
std::process::exit(1);
|
||
}
|
||
|
||
let seed = std::time::SystemTime::now()
|
||
.duration_since(std::time::UNIX_EPOCH)
|
||
.map(|d| d.as_nanos() as u64)
|
||
.unwrap_or(0)
|
||
^ (std::process::id() as u64).wrapping_mul(0x9E3779B97F4A7C15);
|
||
|
||
let mut g = Gen::new(&rules, seed);
|
||
match g.generate(rule) {
|
||
Some(out) => print!("{out}"),
|
||
None => {
|
||
eprintln!("Failed to generate from rule '{rule}'");
|
||
std::process::exit(1);
|
||
}
|
||
}
|
||
}
|