cell/bench.ce

var shop = use('internal/shop')
var pkg = use('package')
var fd = use('fd')
var time = use('time')
var json = use('json')
var blob = use('blob')
var os = use('os')
var testlib = use('internal/testlib')
var math = use('math/radians')

var _args = args == null ? [] : args

var target_pkg = null // null = current package
var target_bench = null // null = all benchmarks, otherwise specific bench file
var all_pkgs = false
var bench_mode = "bytecode" // "bytecode", "native", or "compare"

// Strip mode flags from args before parsing
function strip_mode_flags() {
  var filtered = []
  arrfor(_args, function(a) {
    if (a == '--native') {
      bench_mode = "native"
    } else if (a == '--bytecode') {
      bench_mode = "bytecode"
    } else if (a == '--compare') {
      bench_mode = "compare"
    } else {
      push(filtered, a)
    }
  })
  _args = filtered
}
strip_mode_flags()

// Benchmark configuration
def WARMUP_BATCHES = 3
def SAMPLES = 11 // Number of timing samples to collect
def TARGET_SAMPLE_NS = 20000000 // 20ms per sample (fast mode)
def MIN_SAMPLE_NS = 2000000 // 2ms minimum sample duration
def MIN_BATCH_SIZE = 1
def MAX_BATCH_SIZE = 100000000 // 100M iterations max per batch

// Statistical functions
function median(arr) {
  if (length(arr) == 0) return 0
  var sorted = sort(arr)
  var mid = floor(length(arr) / 2)
  if (length(arr) % 2 == 0) {
    return (sorted[mid - 1] + sorted[mid]) / 2
  }
  return sorted[mid]
}

function mean(arr) {
  if (length(arr) == 0) return 0
  var sum = 0
  arrfor(arr, function(val) {
    sum += val
  })
  return sum / length(arr)
}

function stddev(arr, mean_val) {
  if (length(arr) < 2) return 0
  var sum_sq_diff = 0
  arrfor(arr, function(val) {
    var diff = val - mean_val
    sum_sq_diff += diff * diff
  })
  return math.sqrt(sum_sq_diff / (length(arr) - 1))
}

function percentile(arr, p) {
  if (length(arr) == 0) return 0
  var sorted = sort(arr)
  var idx = floor(length(arr) * p / 100)
  if (idx >= length(arr)) idx = length(arr) - 1
  return sorted[idx]
}

// Parse arguments similar to test.ce
function parse_args() {
  var name = null
  var lock = null
  var resolved = null
  var bench_path = null

  if (length(_args) == 0) {
    if (!testlib.is_valid_package('.')) {
      log.console('No cell.toml found in current directory')
      return false
    }
    target_pkg = null
    return true
  }

  if (_args[0] == 'all') {
    if (!testlib.is_valid_package('.')) {
      log.console('No cell.toml found in current directory')
      return false
    }
    target_pkg = null
    return true
  }

  if (_args[0] == 'package') {
    if (length(_args) < 2) {
      log.console('Usage: cell bench package <name> [bench]')
      log.console('       cell bench package all')
      return false
    }

    if (_args[1] == 'all') {
      all_pkgs = true
      log.console('Benchmarking all packages...')
      return true
    }

    name = _args[1]
    lock = shop.load_lock()
    if (lock[name]) {
      target_pkg = name
    } else if (starts_with(name, '/') && testlib.is_valid_package(name)) {
      target_pkg = name
    } else {
      if (testlib.is_valid_package('.')) {
        resolved = pkg.alias_to_package(null, name)
        if (resolved) {
          target_pkg = resolved
        } else {
          log.console(`Package not found: ${name}`)
          return false
        }
      } else {
        log.console(`Package not found: ${name}`)
        return false
      }
    }

    if (length(_args) >= 3) {
      target_bench = _args[2]
    }

    log.console(`Benchmarking package: ${target_pkg}`)
    return true
  }

  // cell bench benches/suite or cell bench <path>
  bench_path = _args[0]

  // Normalize path - add benches/ prefix if not present
  if (!starts_with(bench_path, 'benches/') && !starts_with(bench_path, '/')) {
    if (!fd.is_file(bench_path + '.cm') && !fd.is_file(bench_path)) {
      if (fd.is_file('benches/' + bench_path + '.cm') || fd.is_file('benches/' + bench_path)) {
        bench_path = 'benches/' + bench_path
      }
    }
  }

  target_bench = bench_path
  target_pkg = null

  if (!testlib.is_valid_package('.')) {
    log.console('No cell.toml found in current directory')
    return false
  }

  return true
}

if (!parse_args()) {
  $stop()
  return
}

// Collect benchmark files from a package
function collect_benches(package_name, specific_bench) {
  var prefix = testlib.get_pkg_dir(package_name)
  var benches_dir = prefix + '/benches'

  if (!fd.is_dir(benches_dir)) return []

  var files = pkg.list_files(package_name)
  var bench_files = []
  arrfor(files, function(f) {
    var bench_name = null
    var match_name = null
    var match_base = null
    if (starts_with(f, "benches/") && ends_with(f, ".cm")) {
      if (specific_bench) {
        bench_name = text(f, 0, -3)
        match_name = specific_bench
        if (!starts_with(match_name, 'benches/')) match_name = 'benches/' + match_name
        match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
        if (bench_name != match_base) return
      }
      push(bench_files, f)
    }
  })
  return bench_files
}

// Calibrate batch size for a benchmark
function calibrate_batch_size(bench_fn, is_batch) {
  if (!is_batch) return 1

  var n = MIN_BATCH_SIZE
  var dt = 0
  var start = 0
  var new_n = 0
  var calc = 0
  var target_n = 0

  // Find a batch size that takes at least MIN_SAMPLE_NS
  while (n < MAX_BATCH_SIZE) {
    if (!is_number(n) || n < 1) {
      n = 1
      break
    }

    start = os.now()
    bench_fn(n)
    dt = os.now() - start

    if (dt >= MIN_SAMPLE_NS) break

    new_n = n * 2
    if (!is_number(new_n) || new_n > MAX_BATCH_SIZE) {
      n = MAX_BATCH_SIZE
      break
    }
    n = new_n
  }

  // Adjust to target sample duration
  if (dt > 0 && dt < TARGET_SAMPLE_NS && is_number(n) && is_number(dt)) {
    calc = n * TARGET_SAMPLE_NS / dt
    if (is_number(calc) && calc > 0) {
      target_n = floor(calc)
      if (is_number(target_n) && target_n > 0) {
        if (target_n > MAX_BATCH_SIZE) target_n = MAX_BATCH_SIZE
        if (target_n < MIN_BATCH_SIZE) target_n = MIN_BATCH_SIZE
        n = target_n
      }
    }
  }

  if (!is_number(n) || n < 1) {
    n = 1
  }

  return n
}

// Run a single benchmark function
function run_single_bench(bench_fn, bench_name) {
  var timings_per_op = []
  var is_structured = is_object(bench_fn) && bench_fn.run
  var is_batch = false
  var batch_size = 1
  var setup_fn = null
  var run_fn = null
  var teardown_fn = null
  var calibrate_fn = null
  var _detect = null
  var i = 0
  var state = null
  var start = 0
  var duration = 0
  var ns_per_op = 0

  if (is_structured) {
    setup_fn = bench_fn.setup || function() { return null }
    run_fn = bench_fn.run
    teardown_fn = bench_fn.teardown || function(s) {}

    // Check if run function accepts batch size
    _detect = function() {
      var test_state = setup_fn()
      run_fn(1, test_state)
      is_batch = true
      if (teardown_fn) teardown_fn(test_state)
    } disruption {
      is_batch = false
    }
    _detect()

    calibrate_fn = function(n) {
      var s = setup_fn()
      run_fn(n, s)
      if (teardown_fn) teardown_fn(s)
    }
    batch_size = calibrate_batch_size(calibrate_fn, is_batch)

    if (!is_number(batch_size) || batch_size < 1) {
      batch_size = 1
    }
  } else {
    // Simple function format
    _detect = function() {
      bench_fn(1)
      is_batch = true
    } disruption {
      is_batch = false
    }
    _detect()
    batch_size = calibrate_batch_size(bench_fn, is_batch)
  }

  if (!batch_size || batch_size < 1) {
    batch_size = 1
  }

  // Warmup phase
  for (i = 0; i < WARMUP_BATCHES; i++) {
    if (!is_number(batch_size) || batch_size < 1) {
      batch_size = 1
    }

    if (is_structured) {
      state = setup_fn()
      if (is_batch) {
        run_fn(batch_size, state)
      } else {
        run_fn(state)
      }
      if (teardown_fn) teardown_fn(state)
    } else {
      if (is_batch) {
        bench_fn(batch_size)
      } else {
        bench_fn()
      }
    }
  }

  // Measurement phase - collect SAMPLES timing samples
  for (i = 0; i < SAMPLES; i++) {
    if (!is_number(batch_size) || batch_size < 1) {
      batch_size = 1
    }

    if (is_structured) {
      state = setup_fn()
      start = os.now()
      if (is_batch) {
        run_fn(batch_size, state)
      } else {
        run_fn(state)
      }
      duration = os.now() - start
      if (teardown_fn) teardown_fn(state)

      ns_per_op = is_batch ? duration / batch_size : duration
      push(timings_per_op, ns_per_op)
    } else {
      start = os.now()
      if (is_batch) {
        bench_fn(batch_size)
      } else {
        bench_fn()
      }
      duration = os.now() - start

      ns_per_op = is_batch ? duration / batch_size : duration
      push(timings_per_op, ns_per_op)
    }
  }

  // Calculate statistics
  var mean_ns = mean(timings_per_op)
  var median_ns = median(timings_per_op)
  var min_ns = reduce(timings_per_op, min)
  var max_ns = reduce(timings_per_op, max)
  var stddev_ns = stddev(timings_per_op, mean_ns)
  var p95_ns = percentile(timings_per_op, 95)
  var p99_ns = percentile(timings_per_op, 99)

  var ops_per_sec = 0
  if (median_ns > 0) {
    ops_per_sec = floor(1000000000 / median_ns)
  }

  return {
    name: bench_name,
    batch_size: batch_size,
    samples: SAMPLES,
    mean_ns: round(mean_ns),
    median_ns: round(median_ns),
    min_ns: round(min_ns),
    max_ns: round(max_ns),
    stddev_ns: round(stddev_ns),
    p95_ns: round(p95_ns),
    p99_ns: round(p99_ns),
    ops_per_sec: ops_per_sec
  }
}

// Format nanoseconds for display
function format_ns(ns) {
  if (ns < 1000) return `${ns}ns`
  if (ns < 1000000) return `${round(ns / 1000 * 100) / 100}µs`
  if (ns < 1000000000) return `${round(ns / 1000000 * 100) / 100}ms`
  return `${round(ns / 1000000000 * 100) / 100}s`
}

// Format ops/sec for display
function format_ops(ops) {
  if (ops < 1000) return `${ops} ops/s`
  if (ops < 1000000) return `${round(ops / 1000 * 100) / 100}K ops/s`
  if (ops < 1000000000) return `${round(ops / 1000000 * 100) / 100}M ops/s`
  return `${round(ops / 1000000000 * 100) / 100}G ops/s`
}

// Load a module for benchmarking in the given mode
// Returns the module value, or null on failure
function load_bench_module(f, package_name, mode) {
  var mod_path = text(f, 0, -3)
  var use_pkg = package_name ? package_name : fd.realpath('.')
  var prefix = null
  var src_path = null
  if (mode == "native") {
    prefix = testlib.get_pkg_dir(package_name)
    src_path = prefix + '/' + f
    return shop.use_native(src_path, use_pkg)
  }
  return shop.use(mod_path, use_pkg)
}

// Collect benchmark functions from a loaded module
function collect_bench_fns(bench_mod) {
  var benches = []
  if (is_function(bench_mod)) {
    push(benches, {name: 'main', fn: bench_mod})
  } else if (is_object(bench_mod)) {
    arrfor(array(bench_mod), function(k) {
      if (is_function(bench_mod[k]))
        push(benches, {name: k, fn: bench_mod[k]})
    })
  }
  return benches
}

// Print results for a single benchmark
function print_bench_result(result, label) {
  var prefix = label ? `[${label}] ` : ''
  log.console(`    ${prefix}${format_ns(result.median_ns)}/op  ${format_ops(result.ops_per_sec)}`)
  log.console(`    ${prefix}min: ${format_ns(result.min_ns)}  max: ${format_ns(result.max_ns)}  stddev: ${format_ns(result.stddev_ns)}`)
  if (result.batch_size > 1) {
    log.console(`    ${prefix}batch: ${result.batch_size}  samples: ${result.samples}`)
  }
}

// Run benchmarks for a package
function run_benchmarks(package_name, specific_bench) {
  var bench_files = collect_benches(package_name, specific_bench)

  var pkg_result = {
    package: package_name || "local",
    files: [],
    total: 0
  }

  if (length(bench_files) == 0) return pkg_result

  var mode_label = bench_mode == "compare" ? "bytecode vs native" : bench_mode
  if (package_name) log.console(`Running benchmarks for ${package_name} (${mode_label})`)
  else log.console(`Running benchmarks for local package (${mode_label})`)

  arrfor(bench_files, function(f) {
    var load_error = false
    var benches = []
    var native_benches = []
    var bench_mod = null
    var native_mod = null
    var error_result = null

    var file_result = {
      name: f,
      benchmarks: []
    }

    var _load_file = function() {
      var _load_native = null
      if (bench_mode == "compare") {
        bench_mod = load_bench_module(f, package_name, "bytecode")
        benches = collect_bench_fns(bench_mod)
        _load_native = function() {
          native_mod = load_bench_module(f, package_name, "native")
          native_benches = collect_bench_fns(native_mod)
        } disruption {
          log.console(`  ${f}: native compilation failed, comparing skipped`)
          native_benches = []
        }
        _load_native()
      } else {
        bench_mod = load_bench_module(f, package_name, bench_mode)
        benches = collect_bench_fns(bench_mod)
      }

      if (length(benches) > 0) {
        log.console(`  ${f}`)
        arrfor(benches, function(b) {
          var bench_error = false
          var result = null
          var nat_b = null
          var nat_error = false
          var nat_result = null

          var _run_bench = function() {
            var speedup = 0
            var _run_nat = null
            result = run_single_bench(b.fn, b.name)
            result.package = pkg_result.package
            result.mode = bench_mode == "compare" ? "bytecode" : bench_mode
            push(file_result.benchmarks, result)
            pkg_result.total++

            log.console(`    ${result.name}`)
            if (bench_mode == "compare") {
              print_bench_result(result, "bytecode")

              // Find matching native bench and run it
              nat_b = find(native_benches, function(nb) { return nb.name == b.name })
              if (nat_b != null) {
                _run_nat = function() {
                  nat_result = run_single_bench(native_benches[nat_b].fn, b.name)
                  nat_result.package = pkg_result.package
                  nat_result.mode = "native"
                  push(file_result.benchmarks, nat_result)
                  pkg_result.total++
                  print_bench_result(nat_result, "native ")

                  if (nat_result.median_ns > 0) {
                    speedup = result.median_ns / nat_result.median_ns
                    log.console(`    speedup: ${round(speedup * 100) / 100}x`)
                  }
                } disruption {
                  nat_error = true
                }
                _run_nat()
                if (nat_error) {
                  log.console(`    [native ] ERROR`)
                }
              } else {
                log.console(`    [native ] (no matching function)`)
              }
            } else {
              print_bench_result(result, null)
            }
          } disruption {
            bench_error = true
          }
          _run_bench()
          if (bench_error) {
            log.console(`    ERROR ${b.name}`)
            error_result = {
              package: pkg_result.package,
              name: b.name,
              error: "benchmark disrupted"
            }
            push(file_result.benchmarks, error_result)
            pkg_result.total++
          }
        })
      }
    } disruption {
      load_error = true
    }
    _load_file()
    if (load_error) {
      log.console(`  Error loading ${f}`)
      error_result = {
        package: pkg_result.package,
        name: "load_module",
        error: "error loading module"
      }
      push(file_result.benchmarks, error_result)
      pkg_result.total++
    }

    if (length(file_result.benchmarks) > 0) {
      push(pkg_result.files, file_result)
    }
  })

  return pkg_result
}

// Run all benchmarks
var all_results = []
var packages = null

if (all_pkgs) {
  if (testlib.is_valid_package('.')) {
    push(all_results, run_benchmarks(null, null))
  }

  packages = shop.list_packages()
  arrfor(packages, function(p) {
    push(all_results, run_benchmarks(p, null))
  })
} else {
  push(all_results, run_benchmarks(target_pkg, target_bench))
}

// Calculate totals
var total_benches = 0
arrfor(all_results, function(result) {
  total_benches += result.total
})

log.console(`----------------------------------------`)
log.console(`Benchmarks: ${total_benches} total`)

// Generate reports
function generate_reports() {
  var timestamp = text(floor(time.number()))
  var report_dir = shop.get_reports_dir() + '/bench_' + timestamp
  testlib.ensure_dir(report_dir)

  var mode_str = bench_mode == "compare" ? "bytecode vs native" : bench_mode
  var txt_report = `BENCHMARK REPORT
Date: ${time.text(time.number())}
Mode: ${mode_str}
Total benchmarks: ${total_benches}

=== SUMMARY ===
`
  arrfor(all_results, function(pkg_res) {
    if (pkg_res.total == 0) return
    txt_report += `Package: ${pkg_res.package}\n`
    arrfor(pkg_res.files, function(f) {
      txt_report += `  ${f.name}\n`
      arrfor(f.benchmarks, function(b) {
        var mode_tag = b.mode ? ` [${b.mode}]` : ''
        if (b.error) {
          txt_report += `    ERROR ${b.name}: ${b.error}\n`
        } else {
          txt_report += `    ${b.name}${mode_tag}: ${format_ns(b.median_ns)}/op (${format_ops(b.ops_per_sec)})\n`
        }
      })
    })
  })

  txt_report += `\n=== DETAILED RESULTS ===\n`
  arrfor(all_results, function(pkg_res) {
    if (pkg_res.total == 0) return

    arrfor(pkg_res.files, function(f) {
      arrfor(f.benchmarks, function(b) {
        if (b.error) return

        var detail_mode = b.mode ? ` [${b.mode}]` : ''
        txt_report += `\n${pkg_res.package}::${b.name}${detail_mode}\n`
        txt_report += `  batch_size: ${b.batch_size}  samples: ${b.samples}\n`
        txt_report += `  median: ${format_ns(b.median_ns)}/op\n`
        txt_report += `  mean: ${format_ns(b.mean_ns)}/op\n`
        txt_report += `  min: ${format_ns(b.min_ns)}\n`
        txt_report += `  max: ${format_ns(b.max_ns)}\n`
        txt_report += `  stddev: ${format_ns(b.stddev_ns)}\n`
        txt_report += `  p95: ${format_ns(b.p95_ns)}\n`
        txt_report += `  p99: ${format_ns(b.p99_ns)}\n`
        txt_report += `  ops/s: ${format_ops(b.ops_per_sec)}\n`
      })
    })
  })

  testlib.ensure_dir(report_dir)
  fd.slurpwrite(`${report_dir}/bench.txt`, stone(blob(txt_report)))
  log.console(`Report written to ${report_dir}/bench.txt`)

  // Generate JSON per package
  arrfor(all_results, function(pkg_res) {
    if (pkg_res.total == 0) return

    var pkg_benches = []
    arrfor(pkg_res.files, function(f) {
      arrfor(f.benchmarks, function(benchmark) {
        push(pkg_benches, benchmark)
      })
    })

    var json_path = `${report_dir}/${replace(pkg_res.package, /\//, '_')}.json`
    fd.slurpwrite(json_path, stone(blob(json.encode(pkg_benches))))
  })
}

generate_reports()
$stop()