// cell bench - Run benchmarks with statistical analysis var shop = use('internal/shop') var pkg = use('package') var fd = use('fd') var time = use('time') var json = use('json') var blob = use('blob') var os = use('internal/os') var testlib = use('internal/testlib') var math = use('math/radians') var _args = args == null ? [] : args var target_pkg = null // null = current package var target_bench = null // null = all benchmarks, otherwise specific bench file var all_pkgs = false var bench_mode = "bytecode" // "bytecode", "native", or "compare" // Strip mode flags from args before parsing function strip_mode_flags() { var filtered = [] arrfor(_args, function(a) { if (a == '--native') { bench_mode = "native" } else if (a == '--bytecode') { bench_mode = "bytecode" } else if (a == '--compare') { bench_mode = "compare" } else { push(filtered, a) } }) _args = filtered } strip_mode_flags() // Benchmark configuration def WARMUP_BATCHES = 3 def SAMPLES = 11 // Number of timing samples to collect def TARGET_SAMPLE_NS = 20000000 // 20ms per sample (fast mode) def MIN_SAMPLE_NS = 2000000 // 2ms minimum sample duration def MIN_BATCH_SIZE = 1 def MAX_BATCH_SIZE = 100000000 // 100M iterations max per batch // Statistical functions function median(arr) { if (length(arr) == 0) return 0 var sorted = sort(arr) var mid = floor(length(arr) / 2) if (length(arr) % 2 == 0) { return (sorted[mid - 1] + sorted[mid]) / 2 } return sorted[mid] } function mean(arr) { if (length(arr) == 0) return 0 var sum = 0 arrfor(arr, function(val) { sum += val }) return sum / length(arr) } function stddev(arr, mean_val) { if (length(arr) < 2) return 0 var sum_sq_diff = 0 arrfor(arr, function(val) { var diff = val - mean_val sum_sq_diff += diff * diff }) return math.sqrt(sum_sq_diff / (length(arr) - 1)) } function percentile(arr, p) { if (length(arr) == 0) return 0 var sorted = sort(arr) var idx = floor(length(arr) * p / 100) if (idx >= length(arr)) idx = length(arr) - 1 return sorted[idx] } // Parse arguments similar to test.ce function parse_args() { var name = null var lock = null var resolved = null var bench_path = null if (length(_args) == 0) { if (!testlib.is_valid_package('.')) { log.console('No cell.toml found in current directory') return false } target_pkg = null return true } if (_args[0] == 'all') { if (!testlib.is_valid_package('.')) { log.console('No cell.toml found in current directory') return false } target_pkg = null return true } if (_args[0] == 'package') { if (length(_args) < 2) { log.console('Usage: cell bench package [bench]') log.console(' cell bench package all') return false } if (_args[1] == 'all') { all_pkgs = true log.console('Benchmarking all packages...') return true } name = _args[1] lock = shop.load_lock() if (lock[name]) { target_pkg = name } else if (starts_with(name, '/') && testlib.is_valid_package(name)) { target_pkg = name } else { if (testlib.is_valid_package('.')) { resolved = pkg.alias_to_package(null, name) if (resolved) { target_pkg = resolved } else { log.console(`Package not found: ${name}`) return false } } else { log.console(`Package not found: ${name}`) return false } } if (length(_args) >= 3) { target_bench = _args[2] } log.console(`Benchmarking package: ${target_pkg}`) return true } // cell bench benches/suite or cell bench bench_path = _args[0] // Normalize path - add benches/ prefix if not present if (!starts_with(bench_path, 'benches/') && !starts_with(bench_path, '/')) { if (!fd.is_file(bench_path + '.cm') && !fd.is_file(bench_path)) { if (fd.is_file('benches/' + bench_path + '.cm') || fd.is_file('benches/' + bench_path)) { bench_path = 'benches/' + bench_path } } } target_bench = bench_path target_pkg = null if (!testlib.is_valid_package('.')) { log.console('No cell.toml found in current directory') return false } return true } if (!parse_args()) { $stop() return } // Collect benchmark files from a package function collect_benches(package_name, specific_bench) { var prefix = testlib.get_pkg_dir(package_name) var benches_dir = prefix + '/benches' if (!fd.is_dir(benches_dir)) return [] var files = pkg.list_files(package_name) var bench_files = [] arrfor(files, function(f) { var bench_name = null var match_name = null var match_base = null if (starts_with(f, "benches/") && ends_with(f, ".cm")) { if (specific_bench) { bench_name = text(f, 0, -3) match_name = specific_bench if (!starts_with(match_name, 'benches/')) match_name = 'benches/' + match_name match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name if (bench_name != match_base) return } push(bench_files, f) } }) return bench_files } // Calibrate batch size for a benchmark function calibrate_batch_size(bench_fn, is_batch) { if (!is_batch) return 1 var n = MIN_BATCH_SIZE var dt = 0 var start = 0 var new_n = 0 var calc = 0 var target_n = 0 // Find a batch size that takes at least MIN_SAMPLE_NS while (n < MAX_BATCH_SIZE) { if (!is_number(n) || n < 1) { n = 1 break } start = os.now() bench_fn(n) dt = os.now() - start if (dt >= MIN_SAMPLE_NS) break new_n = n * 2 if (!is_number(new_n) || new_n > MAX_BATCH_SIZE) { n = MAX_BATCH_SIZE break } n = new_n } // Adjust to target sample duration if (dt > 0 && dt < TARGET_SAMPLE_NS && is_number(n) && is_number(dt)) { calc = n * TARGET_SAMPLE_NS / dt if (is_number(calc) && calc > 0) { target_n = floor(calc) if (is_number(target_n) && target_n > 0) { if (target_n > MAX_BATCH_SIZE) target_n = MAX_BATCH_SIZE if (target_n < MIN_BATCH_SIZE) target_n = MIN_BATCH_SIZE n = target_n } } } if (!is_number(n) || n < 1) { n = 1 } return n } // Run a single benchmark function function run_single_bench(bench_fn, bench_name) { var timings_per_op = [] var is_structured = is_object(bench_fn) && bench_fn.run var is_batch = false var batch_size = 1 var setup_fn = null var run_fn = null var teardown_fn = null var calibrate_fn = null var _detect = null var i = 0 var state = null var start = 0 var duration = 0 var ns_per_op = 0 if (is_structured) { setup_fn = bench_fn.setup || function() { return null } run_fn = bench_fn.run teardown_fn = bench_fn.teardown || function(s) {} // Check if run function accepts batch size _detect = function() { var test_state = setup_fn() run_fn(1, test_state) is_batch = true if (teardown_fn) teardown_fn(test_state) } disruption { is_batch = false } _detect() calibrate_fn = function(n) { var s = setup_fn() run_fn(n, s) if (teardown_fn) teardown_fn(s) } batch_size = calibrate_batch_size(calibrate_fn, is_batch) if (!is_number(batch_size) || batch_size < 1) { batch_size = 1 } } else { // Simple function format _detect = function() { bench_fn(1) is_batch = true } disruption { is_batch = false } _detect() batch_size = calibrate_batch_size(bench_fn, is_batch) } if (!batch_size || batch_size < 1) { batch_size = 1 } // Warmup phase for (i = 0; i < WARMUP_BATCHES; i++) { if (!is_number(batch_size) || batch_size < 1) { batch_size = 1 } if (is_structured) { state = setup_fn() if (is_batch) { run_fn(batch_size, state) } else { run_fn(state) } if (teardown_fn) teardown_fn(state) } else { if (is_batch) { bench_fn(batch_size) } else { bench_fn() } } } // Measurement phase - collect SAMPLES timing samples for (i = 0; i < SAMPLES; i++) { if (!is_number(batch_size) || batch_size < 1) { batch_size = 1 } if (is_structured) { state = setup_fn() start = os.now() if (is_batch) { run_fn(batch_size, state) } else { run_fn(state) } duration = os.now() - start if (teardown_fn) teardown_fn(state) ns_per_op = is_batch ? duration / batch_size : duration push(timings_per_op, ns_per_op) } else { start = os.now() if (is_batch) { bench_fn(batch_size) } else { bench_fn() } duration = os.now() - start ns_per_op = is_batch ? duration / batch_size : duration push(timings_per_op, ns_per_op) } } // Calculate statistics var mean_ns = mean(timings_per_op) var median_ns = median(timings_per_op) var min_ns = reduce(timings_per_op, min) var max_ns = reduce(timings_per_op, max) var stddev_ns = stddev(timings_per_op, mean_ns) var p95_ns = percentile(timings_per_op, 95) var p99_ns = percentile(timings_per_op, 99) var ops_per_sec = 0 if (median_ns > 0) { ops_per_sec = floor(1000000000 / median_ns) } return { name: bench_name, batch_size: batch_size, samples: SAMPLES, mean_ns: round(mean_ns), median_ns: round(median_ns), min_ns: round(min_ns), max_ns: round(max_ns), stddev_ns: round(stddev_ns), p95_ns: round(p95_ns), p99_ns: round(p99_ns), ops_per_sec: ops_per_sec } } // Format nanoseconds for display function format_ns(ns) { if (ns < 1000) return `${ns}ns` if (ns < 1000000) return `${round(ns / 1000 * 100) / 100}µs` if (ns < 1000000000) return `${round(ns / 1000000 * 100) / 100}ms` return `${round(ns / 1000000000 * 100) / 100}s` } // Format ops/sec for display function format_ops(ops) { if (ops < 1000) return `${ops} ops/s` if (ops < 1000000) return `${round(ops / 1000 * 100) / 100}K ops/s` if (ops < 1000000000) return `${round(ops / 1000000 * 100) / 100}M ops/s` return `${round(ops / 1000000000 * 100) / 100}G ops/s` } // Load a module for benchmarking in the given mode // Returns the module value, or null on failure function resolve_bench_load(f, package_name) { var mod_path = text(f, 0, -3) var use_pkg = package_name ? package_name : fd.realpath('.') var prefix = testlib.get_pkg_dir(package_name) var src_path = prefix + '/' + f return {mod_path, use_pkg, src_path} } function load_bench_module_native(f, package_name) { var r = resolve_bench_load(f, package_name) return shop.use_native(r.src_path, r.use_pkg) } function load_bench_module(f, package_name, mode) { var r = resolve_bench_load(f, package_name) if (mode == "native") { return load_bench_module_native(f, package_name) } return shop.use(r.mod_path, r.use_pkg) } // Collect benchmark functions from a loaded module function collect_bench_fns(bench_mod) { var benches = [] if (is_function(bench_mod)) { push(benches, {name: 'main', fn: bench_mod}) } else if (is_object(bench_mod)) { arrfor(array(bench_mod), function(k) { if (is_function(bench_mod[k])) push(benches, {name: k, fn: bench_mod[k]}) }) } return benches } // Print results for a single benchmark function print_bench_result(result, label) { var prefix = label ? `[${label}] ` : '' log.console(` ${prefix}${format_ns(result.median_ns)}/op ${format_ops(result.ops_per_sec)}`) log.console(` ${prefix}min: ${format_ns(result.min_ns)} max: ${format_ns(result.max_ns)} stddev: ${format_ns(result.stddev_ns)}`) if (result.batch_size > 1) { log.console(` ${prefix}batch: ${result.batch_size} samples: ${result.samples}`) } } // Run benchmarks for a package function run_benchmarks(package_name, specific_bench) { var bench_files = collect_benches(package_name, specific_bench) var pkg_result = { package: package_name || "local", files: [], total: 0 } if (length(bench_files) == 0) return pkg_result var mode_label = bench_mode == "compare" ? "bytecode vs native" : bench_mode if (package_name) log.console(`Running benchmarks for ${package_name} (${mode_label})`) else log.console(`Running benchmarks for local package (${mode_label})`) arrfor(bench_files, function(f) { var load_error = false var benches = [] var native_benches = [] var bench_mod = null var native_mod = null var error_result = null var file_result = { name: f, benchmarks: [] } var _load_file = function() { var _load_native = null if (bench_mode == "compare") { bench_mod = load_bench_module(f, package_name, "bytecode") benches = collect_bench_fns(bench_mod) _load_native = function() { native_mod = load_bench_module(f, package_name, "native") native_benches = collect_bench_fns(native_mod) } disruption { log.console(` ${f}: native compilation failed, comparing skipped`) native_benches = [] } _load_native() } else { bench_mod = load_bench_module(f, package_name, bench_mode) benches = collect_bench_fns(bench_mod) } if (length(benches) > 0) { log.console(` ${f}`) arrfor(benches, function(b) { var bench_error = false var result = null var nat_b = null var nat_error = false var nat_result = null var _run_bench = function() { var speedup = 0 var _run_nat = null result = run_single_bench(b.fn, b.name) result.package = pkg_result.package result.mode = bench_mode == "compare" ? "bytecode" : bench_mode push(file_result.benchmarks, result) pkg_result.total++ log.console(` ${result.name}`) if (bench_mode == "compare") { print_bench_result(result, "bytecode") // Find matching native bench and run it nat_b = find(native_benches, function(nb) { return nb.name == b.name }) if (nat_b != null) { _run_nat = function() { nat_result = run_single_bench(native_benches[nat_b].fn, b.name) nat_result.package = pkg_result.package nat_result.mode = "native" push(file_result.benchmarks, nat_result) pkg_result.total++ print_bench_result(nat_result, "native ") if (nat_result.median_ns > 0) { speedup = result.median_ns / nat_result.median_ns log.console(` speedup: ${round(speedup * 100) / 100}x`) } } disruption { nat_error = true } _run_nat() if (nat_error) { log.console(` [native ] ERROR`) } } else { log.console(` [native ] (no matching function)`) } } else { print_bench_result(result, null) } } disruption { bench_error = true } _run_bench() if (bench_error) { log.console(` ERROR ${b.name}`) error_result = { package: pkg_result.package, name: b.name, error: "benchmark disrupted" } push(file_result.benchmarks, error_result) pkg_result.total++ } }) } } disruption { load_error = true } _load_file() if (load_error) { log.console(` Error loading ${f}`) error_result = { package: pkg_result.package, name: "load_module", error: "error loading module" } push(file_result.benchmarks, error_result) pkg_result.total++ } if (length(file_result.benchmarks) > 0) { push(pkg_result.files, file_result) } }) return pkg_result } // Run all benchmarks var all_results = [] var packages = null if (all_pkgs) { if (testlib.is_valid_package('.')) { push(all_results, run_benchmarks(null, null)) } packages = shop.list_packages() arrfor(packages, function(p) { push(all_results, run_benchmarks(p, null)) }) } else { push(all_results, run_benchmarks(target_pkg, target_bench)) } // Calculate totals var total_benches = 0 arrfor(all_results, function(result) { total_benches += result.total }) log.console(`----------------------------------------`) log.console(`Benchmarks: ${total_benches} total`) // Generate reports function generate_reports() { var timestamp = text(floor(time.number())) var report_dir = shop.get_reports_dir() + '/bench_' + timestamp testlib.ensure_dir(report_dir) var mode_str = bench_mode == "compare" ? "bytecode vs native" : bench_mode var txt_report = `BENCHMARK REPORT Date: ${time.text(time.number())} Mode: ${mode_str} Total benchmarks: ${total_benches} === SUMMARY === ` arrfor(all_results, function(pkg_res) { if (pkg_res.total == 0) return txt_report += `Package: ${pkg_res.package}\n` arrfor(pkg_res.files, function(f) { txt_report += ` ${f.name}\n` arrfor(f.benchmarks, function(b) { var mode_tag = b.mode ? ` [${b.mode}]` : '' if (b.error) { txt_report += ` ERROR ${b.name}: ${b.error}\n` } else { txt_report += ` ${b.name}${mode_tag}: ${format_ns(b.median_ns)}/op (${format_ops(b.ops_per_sec)})\n` } }) }) }) txt_report += `\n=== DETAILED RESULTS ===\n` arrfor(all_results, function(pkg_res) { if (pkg_res.total == 0) return arrfor(pkg_res.files, function(f) { arrfor(f.benchmarks, function(b) { if (b.error) return var detail_mode = b.mode ? ` [${b.mode}]` : '' txt_report += `\n${pkg_res.package}::${b.name}${detail_mode}\n` txt_report += ` batch_size: ${b.batch_size} samples: ${b.samples}\n` txt_report += ` median: ${format_ns(b.median_ns)}/op\n` txt_report += ` mean: ${format_ns(b.mean_ns)}/op\n` txt_report += ` min: ${format_ns(b.min_ns)}\n` txt_report += ` max: ${format_ns(b.max_ns)}\n` txt_report += ` stddev: ${format_ns(b.stddev_ns)}\n` txt_report += ` p95: ${format_ns(b.p95_ns)}\n` txt_report += ` p99: ${format_ns(b.p99_ns)}\n` txt_report += ` ops/s: ${format_ops(b.ops_per_sec)}\n` }) }) }) testlib.ensure_dir(report_dir) fd.slurpwrite(`${report_dir}/bench.txt`, stone(blob(txt_report))) log.console(`Report written to ${report_dir}/bench.txt`) // Generate JSON per package arrfor(all_results, function(pkg_res) { if (pkg_res.total == 0) return var pkg_benches = [] arrfor(pkg_res.files, function(f) { arrfor(f.benchmarks, function(benchmark) { push(pkg_benches, benchmark) }) }) var json_path = `${report_dir}/${replace(pkg_res.package, /\//, '_')}.json` fd.slurpwrite(json_path, stone(blob(json.encode(pkg_benches)))) }) } generate_reports() $stop()