Files
improvise/bench/gen_workload.py
Edward Langley 401a63f544 bench: add profiling workload generator
Generates Forth-style command scripts that build a multi-dimensional
model and exercise the grid aggregation hot path via repeated
export-csv calls. Used for profiling with samply.

Usage:
  python3 bench/gen_workload.py --scale 5 > /tmp/workload.txt
  cargo build --profile profiling
  samply record ./target/profiling/improvise script /tmp/workload.txt

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 01:38:45 -07:00

84 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Generate a profiling workload script for improvise.
Usage:
python3 bench/gen_workload.py [--scale N] > bench/large_workload.txt
cargo build --release
time ./target/release/improvise script bench/large_workload.txt
For flamegraph profiling:
samply record ./target/release/improvise script bench/large_workload.txt
"""
import argparse
import random
parser = argparse.ArgumentParser()
parser.add_argument("--scale", type=int, default=1,
help="Scale factor (1=small, 5=medium, 10=large)")
parser.add_argument("--density", type=float, default=0.3,
help="Cell density (0.0-1.0)")
parser.add_argument("--exports", type=int, default=0,
help="Number of export passes (0 = one per month)")
args = parser.parse_args()
random.seed(42)
S = args.scale
n_regions = 5 * S
n_products = 8 * S
n_months = 12
n_channels = 4 + S
measures = ["Revenue", "Cost", "Units"]
regions = [f"R{i:03d}" for i in range(n_regions)]
products = [f"P{i:03d}" for i in range(n_products)]
months = [f"M{i:02d}" for i in range(1, n_months + 1)]
channels = [f"Ch{i:02d}" for i in range(n_channels)]
potential = n_regions * n_products * n_months * n_channels * len(measures)
print(f"# Scale={S}, Density={args.density}")
print(f"# {n_regions} regions × {n_products} products × {n_months} months × {n_channels} channels × {len(measures)} measures")
print(f"# Potential cells: {potential}, Expected: ~{int(potential * args.density)}")
print()
for cat in ["Region", "Product", "Month", "Channel", "Measure"]:
print(f"add-category {cat}")
for items, cat in [(regions, "Region"), (products, "Product"),
(months, "Month"), (channels, "Channel"),
(measures, "Measure")]:
for item in items:
print(f"add-item {cat} {item}")
print("set-axis Region row")
print("set-axis Product column")
print("set-axis Month page")
print("set-axis Channel none")
print("set-axis Measure none")
n = 0
for r in regions:
for p in products:
for m in months:
for c in channels:
if random.random() < args.density:
rev = random.randint(100, 10000)
cost = random.randint(50, rev)
units = random.randint(1, 500)
print(f"set-cell {rev} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Revenue")
print(f"set-cell {cost} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Cost")
print(f"set-cell {units} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Units")
n += 3
print(f"# Total cells: {n}")
print('add-formula Measure "Profit = Revenue - Cost"')
print('add-formula Measure "Margin = Profit / Revenue"')
print('add-formula Measure "AvgPrice = Revenue / Units"')
n_exports = args.exports if args.exports > 0 else n_months
for i, m in enumerate(months[:n_exports]):
print(f"set-page Month {m} . export-csv /tmp/improvise_bench_{i:02d}.csv")
print("# Done")