#!/usr/bin/env python3 """Generate a profiling workload script for improvise. Usage: python3 bench/gen_workload.py [--scale N] > bench/large_workload.txt cargo build --release time ./target/release/improvise script bench/large_workload.txt For flamegraph profiling: samply record ./target/release/improvise script bench/large_workload.txt """ import argparse import random parser = argparse.ArgumentParser() parser.add_argument("--scale", type=int, default=1, help="Scale factor (1=small, 5=medium, 10=large)") parser.add_argument("--density", type=float, default=0.3, help="Cell density (0.0-1.0)") parser.add_argument("--exports", type=int, default=0, help="Number of export passes (0 = one per month)") args = parser.parse_args() random.seed(42) S = args.scale n_regions = 5 * S n_products = 8 * S n_months = 12 n_channels = 4 + S measures = ["Revenue", "Cost", "Units"] regions = [f"R{i:03d}" for i in range(n_regions)] products = [f"P{i:03d}" for i in range(n_products)] months = [f"M{i:02d}" for i in range(1, n_months + 1)] channels = [f"Ch{i:02d}" for i in range(n_channels)] potential = n_regions * n_products * n_months * n_channels * len(measures) print(f"# Scale={S}, Density={args.density}") print(f"# {n_regions} regions × {n_products} products × {n_months} months × {n_channels} channels × {len(measures)} measures") print(f"# Potential cells: {potential}, Expected: ~{int(potential * args.density)}") print() for cat in ["Region", "Product", "Month", "Channel", "Measure"]: print(f"add-category {cat}") for items, cat in [(regions, "Region"), (products, "Product"), (months, "Month"), (channels, "Channel"), (measures, "Measure")]: for item in items: print(f"add-item {cat} {item}") print("set-axis Region row") print("set-axis Product column") print("set-axis Month page") print("set-axis Channel none") print("set-axis Measure none") n = 0 for r in regions: for p in products: for m in months: for c in channels: if random.random() < args.density: rev = random.randint(100, 10000) cost = random.randint(50, rev) units = random.randint(1, 500) print(f"set-cell {rev} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Revenue") print(f"set-cell {cost} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Cost") print(f"set-cell {units} Region/{r} Product/{p} Month/{m} Channel/{c} Measure/Units") n += 3 print(f"# Total cells: {n}") print('add-formula Measure "Profit = Revenue - Cost"') print('add-formula Measure "Margin = Profit / Revenue"') print('add-formula Measure "AvgPrice = Revenue / Units"') n_exports = args.exports if args.exports > 0 else n_months for i, m in enumerate(months[:n_exports]): print(f"set-page Month {m} . export-csv /tmp/improvise_bench_{i:02d}.csv") print("# Done")