From 401a63f54465ad82fbcdbdace8d08c13354125e5 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Sun, 5 Apr 2026 01:38:45 -0700 Subject: [PATCH] bench: add profiling workload generator Generates Forth-style command scripts that build a multi-dimensional model and exercise the grid aggregation hot path via repeated export-csv calls. Used for profiling with samply. Usage: python3 bench/gen_workload.py --scale 5 > /tmp/workload.txt cargo build --profile profiling samply record ./target/profiling/improvise script /tmp/workload.txt Co-Authored-By: Claude Opus 4.6 (1M context) --- bench/gen_workload.py | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 bench/gen_workload.py diff --git a/bench/gen_workload.py b/bench/gen_workload.py new file mode 100644 index 0000000..7391fc4 --- /dev/null +++ b/bench/gen_workload.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +"""Generate a profiling workload script for improvise. + +Usage: + python3 bench/gen_workload.py [--scale N] > bench/large_workload.txt + cargo build --release + time ./target/release/improvise script bench/large_workload.txt + +For flamegraph profiling: + samply record ./target/release/improvise script bench/large_workload.txt +""" +import argparse +import random + +parser = argparse.ArgumentParser() +parser.add_argument("--scale", type=int, default=1, + help="Scale factor (1=small, 5=medium, 10=large)") +parser.add_argument("--density", type=float, default=0.3, + help="Cell density (0.0-1.0)") +parser.add_argument("--exports", type=int, default=0, + help="Number of export passes (0 = one per month)") +args = parser.parse_args() + +random.seed(42) +S = args.scale + +n_regions = 5 * S +n_products = 8 * S +n_months = 12 +n_channels = 4 + S +measures = ["Revenue", "Cost", "Units"] + +regions = [f"R{i:03d}" for i in range(n_regions)] +products = [f"P{i:03d}" for i in range(n_products)] +months = [f"M{i:02d}" for i in range(1, n_months + 1)] +channels = [f"Ch{i:02d}" for i in range(n_channels)] + +potential = n_regions * n_products * n_months * n_channels * len(measures) +print(f"# Scale={S}, Density={args.density}") +print(f"# {n_regions} regions × {n_products} products × {n_months} months × {n_channels} channels × {len(measures)} measures") +print(f"# Potential cells: {potential}, Expected: ~{int(potential * args.density)}") +print() + +for cat in ["Region", "Product", "Month", "Channel", "Measure"]: + print(f"add-category {cat}") + +for items, cat in [(regions, "Region"), (products, "Product"), + (months, "Month"), (channels, "Channel"), + (measures, "Measure")]: + for item in items: + print(f"add-item {cat} {item}") + +print("set-axis Region row") +print("set-axis Product column") +print("set-axis Month page") +print("set-axis Channel none") +print("set-axis Measure none") + +n = 0 +for r in regions: + for p in products: + for m in months: + for c in channels: + if random.random() < args.density: + rev = random.randint(100, 10000) + cost = random.randint(50, rev) + units = random.randint(1, 500) + print(f"set-cell {rev} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Revenue") + print(f"set-cell {cost} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Cost") + print(f"set-cell {units} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Units") + n += 3 + +print(f"# Total cells: {n}") + +print('add-formula Measure "Profit = Revenue - Cost"') +print('add-formula Measure "Margin = Profit / Revenue"') +print('add-formula Measure "AvgPrice = Revenue / Units"') + +n_exports = args.exports if args.exports > 0 else n_months +for i, m in enumerate(months[:n_exports]): + print(f"set-page Month {m} . export-csv /tmp/improvise_bench_{i:02d}.csv") + +print("# Done")