bench now compares aot

2026-02-17 13:42:36 -06:00
parent ad26e71ad1
commit 78051e24f3
1 changed files with 121 additions and 21 deletions
--- a/bench.ce
+++ b/bench.ce
@@ -13,6 +13,25 @@ var _args = args == null ? [] : args
 var target_pkg = null // null = current package
 var target_bench = null // null = all benchmarks, otherwise specific bench file
 var all_pkgs = false
+var bench_mode = "bytecode" // "bytecode", "native", or "compare"
+
+// Strip mode flags from args before parsing
+function strip_mode_flags() {
+  var filtered = []
+  arrfor(_args, function(a) {
+    if (a == '--native') {
+      bench_mode = "native"
+    } else if (a == '--bytecode') {
+      bench_mode = "bytecode"
+    } else if (a == '--compare') {
+      bench_mode = "compare"
+    } else {
+      push(filtered, a)
+    }
+  })
+  _args = filtered
+}
+strip_mode_flags()

 // Benchmark configuration
 def WARMUP_BATCHES = 3
@@ -394,6 +413,45 @@ function format_ops(ops) {
  return `${round(ops / 1000000000 * 100) / 100}G ops/s`
 }

+// Load a module for benchmarking in the given mode
+// Returns the module value, or null on failure
+function load_bench_module(f, package_name, mode) {
+  var mod_path = text(f, 0, -3)
+  var use_pkg = package_name ? package_name : fd.realpath('.')
+  var prefix = null
+  var src_path = null
+  if (mode == "native") {
+    prefix = testlib.get_pkg_dir(package_name)
+    src_path = prefix + '/' + f
+    return shop.use_native(src_path, use_pkg)
+  }
+  return shop.use(mod_path, use_pkg)
+}
+
+// Collect benchmark functions from a loaded module
+function collect_bench_fns(bench_mod) {
+  var benches = []
+  if (is_function(bench_mod)) {
+    push(benches, {name: 'main', fn: bench_mod})
+  } else if (is_object(bench_mod)) {
+    arrfor(array(bench_mod), function(k) {
+      if (is_function(bench_mod[k]))
+        push(benches, {name: k, fn: bench_mod[k]})
+    })
+  }
+  return benches
+}
+
+// Print results for a single benchmark
+function print_bench_result(result, label) {
+  var prefix = label ? `[${label}] ` : ''
+  log.console(`    ${prefix}${format_ns(result.median_ns)}/op  ${format_ops(result.ops_per_sec)}`)
+  log.console(`    ${prefix}min: ${format_ns(result.min_ns)}  max: ${format_ns(result.max_ns)}  stddev: ${format_ns(result.stddev_ns)}`)
+  if (result.batch_size > 1) {
+    log.console(`    ${prefix}batch: ${result.batch_size}  samples: ${result.samples}`)
+  }
+}
+
 // Run benchmarks for a package
 function run_benchmarks(package_name, specific_bench) {
  var bench_files = collect_benches(package_name, specific_bench)
@@ -406,15 +464,16 @@ function run_benchmarks(package_name, specific_bench) {

  if (length(bench_files) == 0) return pkg_result

-  if (package_name) log.console(`Running benchmarks for ${package_name}`)
-  else log.console(`Running benchmarks for local package`)
+  var mode_label = bench_mode == "compare" ? "bytecode vs native" : bench_mode
+  if (package_name) log.console(`Running benchmarks for ${package_name} (${mode_label})`)
+  else log.console(`Running benchmarks for local package (${mode_label})`)

  arrfor(bench_files, function(f) {
-    var mod_path = text(f, 0, -3)
    var load_error = false
-    var bench_mod = null
-    var use_pkg = null
    var benches = []
+    var native_benches = []
+    var bench_mod = null
+    var native_mod = null
    var error_result = null

    var file_result = {
@@ -423,16 +482,21 @@ function run_benchmarks(package_name, specific_bench) {
    }

    var _load_file = function() {
-      use_pkg = package_name ? package_name : fd.realpath('.')
-      bench_mod = shop.use(mod_path, use_pkg)
-
-      if (is_function(bench_mod)) {
-        push(benches, {name: 'main', fn: bench_mod})
-      } else if (is_object(bench_mod)) {
-        arrfor(array(bench_mod), function(k) {
-          if (is_function(bench_mod[k]))
-            push(benches, {name: k, fn: bench_mod[k]})
-        })
+      var _load_native = null
+      if (bench_mode == "compare") {
+        bench_mod = load_bench_module(f, package_name, "bytecode")
+        benches = collect_bench_fns(bench_mod)
+        _load_native = function() {
+          native_mod = load_bench_module(f, package_name, "native")
+          native_benches = collect_bench_fns(native_mod)
+        } disruption {
+          log.console(`  ${f}: native compilation failed, comparing skipped`)
+          native_benches = []
+        }
+        _load_native()
+      } else {
+        bench_mod = load_bench_module(f, package_name, bench_mode)
+        benches = collect_bench_fns(bench_mod)
      }

      if (length(benches) > 0) {
@@ -440,18 +504,50 @@ function run_benchmarks(package_name, specific_bench) {
        arrfor(benches, function(b) {
          var bench_error = false
          var result = null
+          var nat_b = null
+          var nat_error = false
+          var nat_result = null

          var _run_bench = function() {
+            var speedup = 0
+            var _run_nat = null
            result = run_single_bench(b.fn, b.name)
            result.package = pkg_result.package
+            result.mode = bench_mode == "compare" ? "bytecode" : bench_mode
            push(file_result.benchmarks, result)
            pkg_result.total++

            log.console(`    ${result.name}`)
-            log.console(`      ${format_ns(result.median_ns)}/op  ${format_ops(result.ops_per_sec)}`)
-            log.console(`      min: ${format_ns(result.min_ns)}  max: ${format_ns(result.max_ns)}  stddev: ${format_ns(result.stddev_ns)}`)
-            if (result.batch_size > 1) {
-              log.console(`      batch: ${result.batch_size}  samples: ${result.samples}`)
+            if (bench_mode == "compare") {
+              print_bench_result(result, "bytecode")
+
+              // Find matching native bench and run it
+              nat_b = find(native_benches, function(nb) { return nb.name == b.name })
+              if (nat_b) {
+                _run_nat = function() {
+                  nat_result = run_single_bench(nat_b.fn, b.name)
+                  nat_result.package = pkg_result.package
+                  nat_result.mode = "native"
+                  push(file_result.benchmarks, nat_result)
+                  pkg_result.total++
+                  print_bench_result(nat_result, "native ")
+
+                  if (nat_result.median_ns > 0) {
+                    speedup = result.median_ns / nat_result.median_ns
+                    log.console(`    speedup: ${round(speedup * 100) / 100}x`)
+                  }
+                } disruption {
+                  nat_error = true
+                }
+                _run_nat()
+                if (nat_error) {
+                  log.console(`    [native ] ERROR`)
+                }
+              } else {
+                log.console(`    [native ] (no matching function)`)
+              }
+            } else {
+              print_bench_result(result, null)
            }
          } disruption {
            bench_error = true
@@ -524,8 +620,10 @@ function generate_reports() {
  var report_dir = shop.get_reports_dir() + '/bench_' + timestamp
  testlib.ensure_dir(report_dir)

+  var mode_str = bench_mode == "compare" ? "bytecode vs native" : bench_mode
  var txt_report = `BENCHMARK REPORT
 Date: ${time.text(time.number())}
+Mode: ${mode_str}
 Total benchmarks: ${total_benches}

 === SUMMARY ===
@@ -536,10 +634,11 @@ Total benchmarks: ${total_benches}
    arrfor(pkg_res.files, function(f) {
      txt_report += `  ${f.name}\n`
      arrfor(f.benchmarks, function(b) {
+        var mode_tag = b.mode ? ` [${b.mode}]` : ''
        if (b.error) {
          txt_report += `    ERROR ${b.name}: ${b.error}\n`
        } else {
-          txt_report += `    ${b.name}: ${format_ns(b.median_ns)}/op (${format_ops(b.ops_per_sec)})\n`
+          txt_report += `    ${b.name}${mode_tag}: ${format_ns(b.median_ns)}/op (${format_ops(b.ops_per_sec)})\n`
        }
      })
    })
@@ -553,7 +652,8 @@ Total benchmarks: ${total_benches}
      arrfor(f.benchmarks, function(b) {
        if (b.error) return

-        txt_report += `\n${pkg_res.package}::${b.name}\n`
+        var detail_mode = b.mode ? ` [${b.mode}]` : ''
+        txt_report += `\n${pkg_res.package}::${b.name}${detail_mode}\n`
        txt_report += `  batch_size: ${b.batch_size}  samples: ${b.samples}\n`
        txt_report += `  median: ${format_ns(b.median_ns)}/op\n`
        txt_report += `  mean: ${format_ns(b.mean_ns)}/op\n`