bench: add profiling workload generator

Generates Forth-style command scripts that build a multi-dimensional
model and exercise the grid aggregation hot path via repeated
export-csv calls. Used for profiling with samply.

Usage:
  python3 bench/gen_workload.py --scale 5 > /tmp/workload.txt
  cargo build --profile profiling
  samply record ./target/profiling/improvise script /tmp/workload.txt

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Edward Langley
2026-04-05 01:38:45 -07:00
parent 3d11daca18
commit 401a63f544

83
bench/gen_workload.py Normal file
View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""Generate a profiling workload script for improvise.
Usage:
python3 bench/gen_workload.py [--scale N] > bench/large_workload.txt
cargo build --release
time ./target/release/improvise script bench/large_workload.txt
For flamegraph profiling:
samply record ./target/release/improvise script bench/large_workload.txt
"""
import argparse
import random
parser = argparse.ArgumentParser()
parser.add_argument("--scale", type=int, default=1,
help="Scale factor (1=small, 5=medium, 10=large)")
parser.add_argument("--density", type=float, default=0.3,
help="Cell density (0.0-1.0)")
parser.add_argument("--exports", type=int, default=0,
help="Number of export passes (0 = one per month)")
args = parser.parse_args()
random.seed(42)
S = args.scale
n_regions = 5 * S
n_products = 8 * S
n_months = 12
n_channels = 4 + S
measures = ["Revenue", "Cost", "Units"]
regions = [f"R{i:03d}" for i in range(n_regions)]
products = [f"P{i:03d}" for i in range(n_products)]
months = [f"M{i:02d}" for i in range(1, n_months + 1)]
channels = [f"Ch{i:02d}" for i in range(n_channels)]
potential = n_regions * n_products * n_months * n_channels * len(measures)
print(f"# Scale={S}, Density={args.density}")
print(f"# {n_regions} regions × {n_products} products × {n_months} months × {n_channels} channels × {len(measures)} measures")
print(f"# Potential cells: {potential}, Expected: ~{int(potential * args.density)}")
print()
for cat in ["Region", "Product", "Month", "Channel", "Measure"]:
print(f"add-category {cat}")
for items, cat in [(regions, "Region"), (products, "Product"),
(months, "Month"), (channels, "Channel"),
(measures, "Measure")]:
for item in items:
print(f"add-item {cat} {item}")
print("set-axis Region row")
print("set-axis Product column")
print("set-axis Month page")
print("set-axis Channel none")
print("set-axis Measure none")
n = 0
for r in regions:
for p in products:
for m in months:
for c in channels:
if random.random() < args.density:
rev = random.randint(100, 10000)
cost = random.randint(50, rev)
units = random.randint(1, 500)
print(f"set-cell {rev} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Revenue")
print(f"set-cell {cost} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Cost")
print(f"set-cell {units} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Units")
n += 3
print(f"# Total cells: {n}")
print('add-formula Measure "Profit = Revenue - Cost"')
print('add-formula Measure "Margin = Profit / Revenue"')
print('add-formula Measure "AvgPrice = Revenue / Units"')
n_exports = args.exports if args.exports > 0 else n_months
for i, m in enumerate(months[:n_exports]):
print(f"set-page Month {m} . export-csv /tmp/improvise_bench_{i:02d}.csv")
print("# Done")