Generates Forth-style command scripts that build a multi-dimensional model and exercise the grid aggregation hot path via repeated export-csv calls. Used for profiling with samply. Usage: python3 bench/gen_workload.py --scale 5 > /tmp/workload.txt cargo build --profile profiling samply record ./target/profiling/improvise script /tmp/workload.txt Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
84 lines
3.0 KiB
Python
84 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
||
"""Generate a profiling workload script for improvise.
|
||
|
||
Usage:
|
||
python3 bench/gen_workload.py [--scale N] > bench/large_workload.txt
|
||
cargo build --release
|
||
time ./target/release/improvise script bench/large_workload.txt
|
||
|
||
For flamegraph profiling:
|
||
samply record ./target/release/improvise script bench/large_workload.txt
|
||
"""
|
||
import argparse
|
||
import random
|
||
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--scale", type=int, default=1,
|
||
help="Scale factor (1=small, 5=medium, 10=large)")
|
||
parser.add_argument("--density", type=float, default=0.3,
|
||
help="Cell density (0.0-1.0)")
|
||
parser.add_argument("--exports", type=int, default=0,
|
||
help="Number of export passes (0 = one per month)")
|
||
args = parser.parse_args()
|
||
|
||
random.seed(42)
|
||
S = args.scale
|
||
|
||
n_regions = 5 * S
|
||
n_products = 8 * S
|
||
n_months = 12
|
||
n_channels = 4 + S
|
||
measures = ["Revenue", "Cost", "Units"]
|
||
|
||
regions = [f"R{i:03d}" for i in range(n_regions)]
|
||
products = [f"P{i:03d}" for i in range(n_products)]
|
||
months = [f"M{i:02d}" for i in range(1, n_months + 1)]
|
||
channels = [f"Ch{i:02d}" for i in range(n_channels)]
|
||
|
||
potential = n_regions * n_products * n_months * n_channels * len(measures)
|
||
print(f"# Scale={S}, Density={args.density}")
|
||
print(f"# {n_regions} regions × {n_products} products × {n_months} months × {n_channels} channels × {len(measures)} measures")
|
||
print(f"# Potential cells: {potential}, Expected: ~{int(potential * args.density)}")
|
||
print()
|
||
|
||
for cat in ["Region", "Product", "Month", "Channel", "Measure"]:
|
||
print(f"add-category {cat}")
|
||
|
||
for items, cat in [(regions, "Region"), (products, "Product"),
|
||
(months, "Month"), (channels, "Channel"),
|
||
(measures, "Measure")]:
|
||
for item in items:
|
||
print(f"add-item {cat} {item}")
|
||
|
||
print("set-axis Region row")
|
||
print("set-axis Product column")
|
||
print("set-axis Month page")
|
||
print("set-axis Channel none")
|
||
print("set-axis Measure none")
|
||
|
||
n = 0
|
||
for r in regions:
|
||
for p in products:
|
||
for m in months:
|
||
for c in channels:
|
||
if random.random() < args.density:
|
||
rev = random.randint(100, 10000)
|
||
cost = random.randint(50, rev)
|
||
units = random.randint(1, 500)
|
||
print(f"set-cell {rev} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Revenue")
|
||
print(f"set-cell {cost} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Cost")
|
||
print(f"set-cell {units} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Units")
|
||
n += 3
|
||
|
||
print(f"# Total cells: {n}")
|
||
|
||
print('add-formula Measure "Profit = Revenue - Cost"')
|
||
print('add-formula Measure "Margin = Profit / Revenue"')
|
||
print('add-formula Measure "AvgPrice = Revenue / Units"')
|
||
|
||
n_exports = args.exports if args.exports > 0 else n_months
|
||
for i, m in enumerate(months[:n_exports]):
|
||
print(f"set-page Month {m} . export-csv /tmp/improvise_bench_{i:02d}.csv")
|
||
|
||
print("# Done")
|