Files
cell/bench.ce

692 lines
19 KiB
Plaintext

var shop = use('internal/shop')
var pkg = use('package')
var fd = use('fd')
var time = use('time')
var json = use('json')
var blob = use('blob')
var os = use('os')
var testlib = use('internal/testlib')
var math = use('math/radians')
var _args = args == null ? [] : args
var target_pkg = null // null = current package
var target_bench = null // null = all benchmarks, otherwise specific bench file
var all_pkgs = false
var bench_mode = "bytecode" // "bytecode", "native", or "compare"
// Strip mode flags from args before parsing
function strip_mode_flags() {
var filtered = []
arrfor(_args, function(a) {
if (a == '--native') {
bench_mode = "native"
} else if (a == '--bytecode') {
bench_mode = "bytecode"
} else if (a == '--compare') {
bench_mode = "compare"
} else {
push(filtered, a)
}
})
_args = filtered
}
strip_mode_flags()
// Benchmark configuration
def WARMUP_BATCHES = 3
def SAMPLES = 11 // Number of timing samples to collect
def TARGET_SAMPLE_NS = 20000000 // 20ms per sample (fast mode)
def MIN_SAMPLE_NS = 2000000 // 2ms minimum sample duration
def MIN_BATCH_SIZE = 1
def MAX_BATCH_SIZE = 100000000 // 100M iterations max per batch
// Statistical functions
function median(arr) {
if (length(arr) == 0) return 0
var sorted = sort(arr)
var mid = floor(length(arr) / 2)
if (length(arr) % 2 == 0) {
return (sorted[mid - 1] + sorted[mid]) / 2
}
return sorted[mid]
}
function mean(arr) {
if (length(arr) == 0) return 0
var sum = 0
arrfor(arr, function(val) {
sum += val
})
return sum / length(arr)
}
function stddev(arr, mean_val) {
if (length(arr) < 2) return 0
var sum_sq_diff = 0
arrfor(arr, function(val) {
var diff = val - mean_val
sum_sq_diff += diff * diff
})
return math.sqrt(sum_sq_diff / (length(arr) - 1))
}
function percentile(arr, p) {
if (length(arr) == 0) return 0
var sorted = sort(arr)
var idx = floor(length(arr) * p / 100)
if (idx >= length(arr)) idx = length(arr) - 1
return sorted[idx]
}
// Parse arguments similar to test.ce
function parse_args() {
var name = null
var lock = null
var resolved = null
var bench_path = null
if (length(_args) == 0) {
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
}
target_pkg = null
return true
}
if (_args[0] == 'all') {
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
}
target_pkg = null
return true
}
if (_args[0] == 'package') {
if (length(_args) < 2) {
log.console('Usage: cell bench package <name> [bench]')
log.console(' cell bench package all')
return false
}
if (_args[1] == 'all') {
all_pkgs = true
log.console('Benchmarking all packages...')
return true
}
name = _args[1]
lock = shop.load_lock()
if (lock[name]) {
target_pkg = name
} else if (starts_with(name, '/') && testlib.is_valid_package(name)) {
target_pkg = name
} else {
if (testlib.is_valid_package('.')) {
resolved = pkg.alias_to_package(null, name)
if (resolved) {
target_pkg = resolved
} else {
log.console(`Package not found: ${name}`)
return false
}
} else {
log.console(`Package not found: ${name}`)
return false
}
}
if (length(_args) >= 3) {
target_bench = _args[2]
}
log.console(`Benchmarking package: ${target_pkg}`)
return true
}
// cell bench benches/suite or cell bench <path>
bench_path = _args[0]
// Normalize path - add benches/ prefix if not present
if (!starts_with(bench_path, 'benches/') && !starts_with(bench_path, '/')) {
if (!fd.is_file(bench_path + '.cm') && !fd.is_file(bench_path)) {
if (fd.is_file('benches/' + bench_path + '.cm') || fd.is_file('benches/' + bench_path)) {
bench_path = 'benches/' + bench_path
}
}
}
target_bench = bench_path
target_pkg = null
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
}
return true
}
if (!parse_args()) {
$stop()
return
}
// Collect benchmark files from a package
function collect_benches(package_name, specific_bench) {
var prefix = testlib.get_pkg_dir(package_name)
var benches_dir = prefix + '/benches'
if (!fd.is_dir(benches_dir)) return []
var files = pkg.list_files(package_name)
var bench_files = []
arrfor(files, function(f) {
var bench_name = null
var match_name = null
var match_base = null
if (starts_with(f, "benches/") && ends_with(f, ".cm")) {
if (specific_bench) {
bench_name = text(f, 0, -3)
match_name = specific_bench
if (!starts_with(match_name, 'benches/')) match_name = 'benches/' + match_name
match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
if (bench_name != match_base) return
}
push(bench_files, f)
}
})
return bench_files
}
// Calibrate batch size for a benchmark
function calibrate_batch_size(bench_fn, is_batch) {
if (!is_batch) return 1
var n = MIN_BATCH_SIZE
var dt = 0
var start = 0
var new_n = 0
var calc = 0
var target_n = 0
// Find a batch size that takes at least MIN_SAMPLE_NS
while (n < MAX_BATCH_SIZE) {
if (!is_number(n) || n < 1) {
n = 1
break
}
start = os.now()
bench_fn(n)
dt = os.now() - start
if (dt >= MIN_SAMPLE_NS) break
new_n = n * 2
if (!is_number(new_n) || new_n > MAX_BATCH_SIZE) {
n = MAX_BATCH_SIZE
break
}
n = new_n
}
// Adjust to target sample duration
if (dt > 0 && dt < TARGET_SAMPLE_NS && is_number(n) && is_number(dt)) {
calc = n * TARGET_SAMPLE_NS / dt
if (is_number(calc) && calc > 0) {
target_n = floor(calc)
if (is_number(target_n) && target_n > 0) {
if (target_n > MAX_BATCH_SIZE) target_n = MAX_BATCH_SIZE
if (target_n < MIN_BATCH_SIZE) target_n = MIN_BATCH_SIZE
n = target_n
}
}
}
if (!is_number(n) || n < 1) {
n = 1
}
return n
}
// Run a single benchmark function
function run_single_bench(bench_fn, bench_name) {
var timings_per_op = []
var is_structured = is_object(bench_fn) && bench_fn.run
var is_batch = false
var batch_size = 1
var setup_fn = null
var run_fn = null
var teardown_fn = null
var calibrate_fn = null
var _detect = null
var i = 0
var state = null
var start = 0
var duration = 0
var ns_per_op = 0
if (is_structured) {
setup_fn = bench_fn.setup || function() { return null }
run_fn = bench_fn.run
teardown_fn = bench_fn.teardown || function(s) {}
// Check if run function accepts batch size
_detect = function() {
var test_state = setup_fn()
run_fn(1, test_state)
is_batch = true
if (teardown_fn) teardown_fn(test_state)
} disruption {
is_batch = false
}
_detect()
calibrate_fn = function(n) {
var s = setup_fn()
run_fn(n, s)
if (teardown_fn) teardown_fn(s)
}
batch_size = calibrate_batch_size(calibrate_fn, is_batch)
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
} else {
// Simple function format
_detect = function() {
bench_fn(1)
is_batch = true
} disruption {
is_batch = false
}
_detect()
batch_size = calibrate_batch_size(bench_fn, is_batch)
}
if (!batch_size || batch_size < 1) {
batch_size = 1
}
// Warmup phase
for (i = 0; i < WARMUP_BATCHES; i++) {
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
if (is_structured) {
state = setup_fn()
if (is_batch) {
run_fn(batch_size, state)
} else {
run_fn(state)
}
if (teardown_fn) teardown_fn(state)
} else {
if (is_batch) {
bench_fn(batch_size)
} else {
bench_fn()
}
}
}
// Measurement phase - collect SAMPLES timing samples
for (i = 0; i < SAMPLES; i++) {
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
if (is_structured) {
state = setup_fn()
start = os.now()
if (is_batch) {
run_fn(batch_size, state)
} else {
run_fn(state)
}
duration = os.now() - start
if (teardown_fn) teardown_fn(state)
ns_per_op = is_batch ? duration / batch_size : duration
push(timings_per_op, ns_per_op)
} else {
start = os.now()
if (is_batch) {
bench_fn(batch_size)
} else {
bench_fn()
}
duration = os.now() - start
ns_per_op = is_batch ? duration / batch_size : duration
push(timings_per_op, ns_per_op)
}
}
// Calculate statistics
var mean_ns = mean(timings_per_op)
var median_ns = median(timings_per_op)
var min_ns = reduce(timings_per_op, min)
var max_ns = reduce(timings_per_op, max)
var stddev_ns = stddev(timings_per_op, mean_ns)
var p95_ns = percentile(timings_per_op, 95)
var p99_ns = percentile(timings_per_op, 99)
var ops_per_sec = 0
if (median_ns > 0) {
ops_per_sec = floor(1000000000 / median_ns)
}
return {
name: bench_name,
batch_size: batch_size,
samples: SAMPLES,
mean_ns: round(mean_ns),
median_ns: round(median_ns),
min_ns: round(min_ns),
max_ns: round(max_ns),
stddev_ns: round(stddev_ns),
p95_ns: round(p95_ns),
p99_ns: round(p99_ns),
ops_per_sec: ops_per_sec
}
}
// Format nanoseconds for display
function format_ns(ns) {
if (ns < 1000) return `${ns}ns`
if (ns < 1000000) return `${round(ns / 1000 * 100) / 100}µs`
if (ns < 1000000000) return `${round(ns / 1000000 * 100) / 100}ms`
return `${round(ns / 1000000000 * 100) / 100}s`
}
// Format ops/sec for display
function format_ops(ops) {
if (ops < 1000) return `${ops} ops/s`
if (ops < 1000000) return `${round(ops / 1000 * 100) / 100}K ops/s`
if (ops < 1000000000) return `${round(ops / 1000000 * 100) / 100}M ops/s`
return `${round(ops / 1000000000 * 100) / 100}G ops/s`
}
// Load a module for benchmarking in the given mode
// Returns the module value, or null on failure
function load_bench_module(f, package_name, mode) {
var mod_path = text(f, 0, -3)
var use_pkg = package_name ? package_name : fd.realpath('.')
var prefix = null
var src_path = null
if (mode == "native") {
prefix = testlib.get_pkg_dir(package_name)
src_path = prefix + '/' + f
return shop.use_native(src_path, use_pkg)
}
return shop.use(mod_path, use_pkg)
}
// Collect benchmark functions from a loaded module
function collect_bench_fns(bench_mod) {
var benches = []
if (is_function(bench_mod)) {
push(benches, {name: 'main', fn: bench_mod})
} else if (is_object(bench_mod)) {
arrfor(array(bench_mod), function(k) {
if (is_function(bench_mod[k]))
push(benches, {name: k, fn: bench_mod[k]})
})
}
return benches
}
// Print results for a single benchmark
function print_bench_result(result, label) {
var prefix = label ? `[${label}] ` : ''
log.console(` ${prefix}${format_ns(result.median_ns)}/op ${format_ops(result.ops_per_sec)}`)
log.console(` ${prefix}min: ${format_ns(result.min_ns)} max: ${format_ns(result.max_ns)} stddev: ${format_ns(result.stddev_ns)}`)
if (result.batch_size > 1) {
log.console(` ${prefix}batch: ${result.batch_size} samples: ${result.samples}`)
}
}
// Run benchmarks for a package
function run_benchmarks(package_name, specific_bench) {
var bench_files = collect_benches(package_name, specific_bench)
var pkg_result = {
package: package_name || "local",
files: [],
total: 0
}
if (length(bench_files) == 0) return pkg_result
var mode_label = bench_mode == "compare" ? "bytecode vs native" : bench_mode
if (package_name) log.console(`Running benchmarks for ${package_name} (${mode_label})`)
else log.console(`Running benchmarks for local package (${mode_label})`)
arrfor(bench_files, function(f) {
var load_error = false
var benches = []
var native_benches = []
var bench_mod = null
var native_mod = null
var error_result = null
var file_result = {
name: f,
benchmarks: []
}
var _load_file = function() {
var _load_native = null
if (bench_mode == "compare") {
bench_mod = load_bench_module(f, package_name, "bytecode")
benches = collect_bench_fns(bench_mod)
_load_native = function() {
native_mod = load_bench_module(f, package_name, "native")
native_benches = collect_bench_fns(native_mod)
} disruption {
log.console(` ${f}: native compilation failed, comparing skipped`)
native_benches = []
}
_load_native()
} else {
bench_mod = load_bench_module(f, package_name, bench_mode)
benches = collect_bench_fns(bench_mod)
}
if (length(benches) > 0) {
log.console(` ${f}`)
arrfor(benches, function(b) {
var bench_error = false
var result = null
var nat_b = null
var nat_error = false
var nat_result = null
var _run_bench = function() {
var speedup = 0
var _run_nat = null
result = run_single_bench(b.fn, b.name)
result.package = pkg_result.package
result.mode = bench_mode == "compare" ? "bytecode" : bench_mode
push(file_result.benchmarks, result)
pkg_result.total++
log.console(` ${result.name}`)
if (bench_mode == "compare") {
print_bench_result(result, "bytecode")
// Find matching native bench and run it
nat_b = find(native_benches, function(nb) { return nb.name == b.name })
if (nat_b != null) {
_run_nat = function() {
nat_result = run_single_bench(native_benches[nat_b].fn, b.name)
nat_result.package = pkg_result.package
nat_result.mode = "native"
push(file_result.benchmarks, nat_result)
pkg_result.total++
print_bench_result(nat_result, "native ")
if (nat_result.median_ns > 0) {
speedup = result.median_ns / nat_result.median_ns
log.console(` speedup: ${round(speedup * 100) / 100}x`)
}
} disruption {
nat_error = true
}
_run_nat()
if (nat_error) {
log.console(` [native ] ERROR`)
}
} else {
log.console(` [native ] (no matching function)`)
}
} else {
print_bench_result(result, null)
}
} disruption {
bench_error = true
}
_run_bench()
if (bench_error) {
log.console(` ERROR ${b.name}`)
error_result = {
package: pkg_result.package,
name: b.name,
error: "benchmark disrupted"
}
push(file_result.benchmarks, error_result)
pkg_result.total++
}
})
}
} disruption {
load_error = true
}
_load_file()
if (load_error) {
log.console(` Error loading ${f}`)
error_result = {
package: pkg_result.package,
name: "load_module",
error: "error loading module"
}
push(file_result.benchmarks, error_result)
pkg_result.total++
}
if (length(file_result.benchmarks) > 0) {
push(pkg_result.files, file_result)
}
})
return pkg_result
}
// Run all benchmarks
var all_results = []
var packages = null
if (all_pkgs) {
if (testlib.is_valid_package('.')) {
push(all_results, run_benchmarks(null, null))
}
packages = shop.list_packages()
arrfor(packages, function(p) {
push(all_results, run_benchmarks(p, null))
})
} else {
push(all_results, run_benchmarks(target_pkg, target_bench))
}
// Calculate totals
var total_benches = 0
arrfor(all_results, function(result) {
total_benches += result.total
})
log.console(`----------------------------------------`)
log.console(`Benchmarks: ${total_benches} total`)
// Generate reports
function generate_reports() {
var timestamp = text(floor(time.number()))
var report_dir = shop.get_reports_dir() + '/bench_' + timestamp
testlib.ensure_dir(report_dir)
var mode_str = bench_mode == "compare" ? "bytecode vs native" : bench_mode
var txt_report = `BENCHMARK REPORT
Date: ${time.text(time.number())}
Mode: ${mode_str}
Total benchmarks: ${total_benches}
=== SUMMARY ===
`
arrfor(all_results, function(pkg_res) {
if (pkg_res.total == 0) return
txt_report += `Package: ${pkg_res.package}\n`
arrfor(pkg_res.files, function(f) {
txt_report += ` ${f.name}\n`
arrfor(f.benchmarks, function(b) {
var mode_tag = b.mode ? ` [${b.mode}]` : ''
if (b.error) {
txt_report += ` ERROR ${b.name}: ${b.error}\n`
} else {
txt_report += ` ${b.name}${mode_tag}: ${format_ns(b.median_ns)}/op (${format_ops(b.ops_per_sec)})\n`
}
})
})
})
txt_report += `\n=== DETAILED RESULTS ===\n`
arrfor(all_results, function(pkg_res) {
if (pkg_res.total == 0) return
arrfor(pkg_res.files, function(f) {
arrfor(f.benchmarks, function(b) {
if (b.error) return
var detail_mode = b.mode ? ` [${b.mode}]` : ''
txt_report += `\n${pkg_res.package}::${b.name}${detail_mode}\n`
txt_report += ` batch_size: ${b.batch_size} samples: ${b.samples}\n`
txt_report += ` median: ${format_ns(b.median_ns)}/op\n`
txt_report += ` mean: ${format_ns(b.mean_ns)}/op\n`
txt_report += ` min: ${format_ns(b.min_ns)}\n`
txt_report += ` max: ${format_ns(b.max_ns)}\n`
txt_report += ` stddev: ${format_ns(b.stddev_ns)}\n`
txt_report += ` p95: ${format_ns(b.p95_ns)}\n`
txt_report += ` p99: ${format_ns(b.p99_ns)}\n`
txt_report += ` ops/s: ${format_ops(b.ops_per_sec)}\n`
})
})
})
testlib.ensure_dir(report_dir)
fd.slurpwrite(`${report_dir}/bench.txt`, stone(blob(txt_report)))
log.console(`Report written to ${report_dir}/bench.txt`)
// Generate JSON per package
arrfor(all_results, function(pkg_res) {
if (pkg_res.total == 0) return
var pkg_benches = []
arrfor(pkg_res.files, function(f) {
arrfor(f.benchmarks, function(benchmark) {
push(pkg_benches, benchmark)
})
})
var json_path = `${report_dir}/${replace(pkg_res.package, /\//, '_')}.json`
fd.slurpwrite(json_path, stone(blob(json.encode(pkg_benches))))
})
}
generate_reports()
$stop()