optimize frames; remove trampoline

2026-02-18 22:37:48 -06:00
parent 27ca008f18
commit e004b2c472
14 changed files with 318 additions and 91 deletions
--- a/build.cm
+++ b/build.cm
@@ -81,7 +81,7 @@ function content_hash(str) {
 }

 // Bump when native codegen/runtime ABI changes so stale dylibs are not reused.
-def NATIVE_CACHE_VERSION = "native-v22"
+def NATIVE_CACHE_VERSION = "native-v23"

 // Enable AOT ASan by creating .cell/asan_aot in the package root.
 function native_sanitize_flags() {
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -168,6 +168,9 @@ pit bench <suite>                  # run specific benchmark file
 pit bench package <name>           # benchmark a named package
 pit bench package <name> <suite>   # specific benchmark in a package
 pit bench package all              # benchmark all packages
+pit bench --bytecode <suite>       # force bytecode-only benchmark run
+pit bench --native <suite>         # force native-only benchmark run
+pit bench --compare <suite>        # run bytecode and native side-by-side
 ```

 Output includes median, mean, standard deviation, and percentiles for each benchmark.
--- a/docs/shop.md
+++ b/docs/shop.md
@@ -37,7 +37,12 @@ On a cache miss, the C runtime loads `boot/bootstrap.cm.mcode` (a pre-compiled s

 ### Cache invalidation

-All caching is content-addressed by BLAKE2 hash of the source. When any source file changes, its hash changes and the old cache entry is simply never looked up again. No manual invalidation is needed. To force a full rebuild, delete `~/.pit/build/`.
+Caching is content-addressed by BLAKE2 over the relevant inputs for each artifact.
+
+- Mach/script cache keys are source-content based.
+- Native (`.dylib`) cache keys include source, host target, native mode marker, native cache version, and sanitize flags.
+
+When inputs change, the old cache entry is simply never looked up again. To force a full rebuild, delete `~/.pit/build/` (or run `cell --dev clean shop --build` in a dev workspace).

 ## Module Resolution

@@ -73,7 +78,7 @@ use('gitea.pockle.world/john/renderer/sprite')

 ## Compilation and Caching

-Every module goes through a content-addressed caching pipeline. The cache key is the BLAKE2 hash of the source content, so changing the source automatically invalidates the cache.
+Every module goes through a content-addressed caching pipeline. Cache keys are based on the inputs that affect the output artifact, so changing any relevant input automatically invalidates the cache.

 ### Cache Hierarchy

@@ -104,7 +109,9 @@ The build cache at `~/.pit/build/` stores ephemeral artifacts named by the BLAKE
 └── f3a4b5c6...          # compiled dylib (checked before copying to lib/)
 ```

-This scheme provides automatic cache invalidation: when source changes, its hash changes, and the old cache entry is simply never looked up again. When building a dylib, the build cache is checked first — if a matching hash exists, it is copied to `lib/` without recompiling.
+This scheme provides automatic cache invalidation: when an input changes, its hash changes, and the old cache entry is simply never looked up again. For native dylibs, inputs include target and native cache version in addition to source.
+
+When native codegen/runtime ABI changes, bump `NATIVE_CACHE_VERSION` in both `build.cm` and `internal/shop.cm` so stale native artifacts are never reused.

 ### Core Module Caching

--- a/docs/testing.md
+++ b/docs/testing.md
@@ -118,6 +118,45 @@ When a mismatch is found:
    MISMATCH: test_foo: result mismatch opt=42 noopt=43
 ```

+## ASAN for Native AOT
+
+When debugging native (`shop.use_native`) crashes, there are two useful sanitizer workflows.
+
+### 1) AOT-only sanitizer (fastest loop)
+
+Enable sanitizer flags for generated native modules by creating a marker file:
+
+```bash
+touch .cell/asan_aot
+cell --dev bench --native fibonacci
+```
+
+This adds `-fsanitize=address -fno-omit-frame-pointer` to AOT module compilation.
+
+Disable it with:
+
+```bash
+rm -f .cell/asan_aot
+```
+
+### 2) Full runtime sanitizer (CLI + runtime + AOT)
+
+Build an ASAN-instrumented `cell` binary:
+
+```bash
+meson setup build-asan -Dbuildtype=debug -Db_sanitize=address
+CCACHE_DISABLE=1 meson compile -C build-asan
+ASAN_OPTIONS=abort_on_error=1:detect_leaks=0 ./build-asan/cell --dev bench --native fibonacci
+```
+
+This catches bugs crossing the boundary between generated dylibs and runtime helpers.
+
+If stale native artifacts are suspected after compiler/runtime changes, clear build outputs first:
+
+```bash
+cell --dev clean shop --build
+```
+
 ## Fuzz Testing

 The fuzzer generates random self-checking programs, compiles them, and runs them through both optimized and unoptimized paths. Each generated program contains test functions that validate their own expected results, so failures catch both correctness bugs and optimizer mismatches.
--- a/fold.cm
+++ b/fold.cm
@@ -458,7 +458,7 @@ var fold = function(ast) {
        else if (k == "-") result = lv - rv
        else if (k == "*") result = lv * rv
        else if (k == "/") result = lv / rv
-        else if (k == "%") result = lv % rv
+        else if (k == "%") result = lv - (trunc(lv / rv) * rv)
        else if (k == "**") result = lv ** rv
        if (result == null) return make_null(expr)
        return make_number(result, expr)
--- a/internal/bootstrap.cm
+++ b/internal/bootstrap.cm
@@ -11,7 +11,9 @@ var json_mod = use_embed('json')
 var crypto = use_embed('crypto')

 function content_hash(content) {
-  return text(crypto.blake2(content), 'h')
+  var data = content
+  if (!is_blob(data)) data = stone(blob(text(data)))
+  return text(crypto.blake2(data), 'h')
 }

 function cache_path(hash) {
--- a/internal/engine.cm
+++ b/internal/engine.cm
@@ -35,7 +35,9 @@ var packages_path = shop_path ? shop_path + '/packages' : null
 var use_cache = {}

 function content_hash(content) {
-  return text(crypto.blake2(content), 'h')
+  var data = content
+  if (!is_blob(data)) data = stone(blob(text(data)))
+  return text(crypto.blake2(data), 'h')
 }

 function cache_path(hash) {
--- a/internal/shop.cm
+++ b/internal/shop.cm
@@ -434,6 +434,16 @@ function detect_host_target() {

 var host_target = detect_host_target()

+// Must match build.cm NATIVE_CACHE_VERSION to detect stale native artifacts.
+def NATIVE_CACHE_VERSION = "native-v23"
+
+function native_sanitize_flags() {
+  if (fd.is_file('.cell/asan_aot')) {
+    return ' -fsanitize=address -fno-omit-frame-pointer'
+  }
+  return ''
+}
+
 // Check for a native .cm dylib at the deterministic lib path
 // Returns a native descriptor {_native, _handle, _sym}, or null if no native dylib exists
 // Also checks staleness: if source has changed, the content-addressed build artifact
@@ -444,6 +454,7 @@ function try_native_mod_dylib(pkg, stem) {
  var src = null
  var host = null
  var hash = null
+  var san_flags = null
  var tc_ext = null
  var build_path = null
  var handle = null
@@ -456,7 +467,8 @@ function try_native_mod_dylib(pkg, stem) {
  if (fd.is_file(src_path)) {
    src = text(fd.slurp(src_path))
    host = detect_host_target()
-    hash = content_hash(src + '\n' + host + '\nnative')
+    san_flags = native_sanitize_flags()
+    hash = content_hash(src + '\n' + host + '\nnative\n' + NATIVE_CACHE_VERSION + '\n' + san_flags)
    tc_ext = dylib_ext
    build_path = global_shop_path + '/build/' + hash + '.' + host + tc_ext
    if (!fd.is_file(build_path)) return null
@@ -1918,4 +1930,4 @@ Shop.use_native = function(path, package_context) {
  return os.native_module_load(handle, env)
 }

-return Shop
+return Shop
--- a/mcode.cm
+++ b/mcode.cm
@@ -4,7 +4,7 @@ var mcode = function(ast) {
  // Translation tables
  var binop_map = {
    "+": "add", "-": "subtract", "*": "multiply", "/": "divide",
-    "%": "modulo", "&": "bitand", "|": "bitor", "^": "bitxor",
+    "%": "remainder", "&": "bitand", "|": "bitor", "^": "bitxor",
    "<<": "shl", ">>": "shr", ">>>": "ushr",
    "==": "eq", "===": "eq", "!=": "ne", "!==": "ne",
    "<": "lt", "<=": "le", ">": "gt", ">=": "ge",
@@ -24,13 +24,13 @@ var mcode = function(ast) {

  var binop_sym = {
    add: "+", subtract: "-", multiply: "*", divide: "/",
-    modulo: "%", pow: "**",
+    remainder: "%", pow: "**",
    lt: "<", le: "<=", gt: ">", ge: ">="
  }

  var compound_map = {
    "+=": "add", "-=": "subtract", "*=": "multiply", "/=": "divide",
-    "%=": "modulo", "&=": "bitand", "|=": "bitor", "^=": "bitxor",
+    "%=": "remainder", "&=": "bitand", "|=": "bitor", "^=": "bitxor",
    "<<=": "shl", ">>=": "shr", ">>>=": "ushr"
  }

@@ -673,7 +673,8 @@ var mcode = function(ast) {
      if (rel != null) {
        emit_relational(rel[0], rel[1], rel[2])
      } else if (op_str == "subtract" || op_str == "multiply" ||
-                 op_str == "divide" || op_str == "modulo" || op_str == "pow") {
+                 op_str == "divide" || op_str == "modulo" || op_str == "remainder" ||
+                 op_str == "pow") {
        emit_numeric_binop(op_str)
      } else {
        // Passthrough for bitwise, in, etc.
--- a/qbe_emit.cm
+++ b/qbe_emit.cm
@@ -1113,6 +1113,9 @@ var qbe_emit = function(ir, qbe, export_name) {
    var ri = 0
    var seg_num = 0
    var resume_val = 0
+    // Native calls should mirror MACH semantics: function calls are mediated
+    // by the frame dispatcher, not recursive C calls.
+    var use_invoke_trampoline = true
    var j_lbl = null
    var j_idx = null
    var jt_lbl = null
@@ -1139,6 +1142,10 @@ var qbe_emit = function(ir, qbe, export_name) {
    var floor_this_slot = 0
    var floor_arg_slot = 0
    var floor_dest_slot = 0
+    var text_frame_slot = 0
+    var text_this_slot = 0
+    var text_arg_slot = 0
+    var text_dest_slot = 0

    // Pre-scan: count invoke/tail_invoke points to assign segment numbers.
    // Must skip dead code (instructions after terminators) the same way
@@ -1187,15 +1194,41 @@ var qbe_emit = function(ir, qbe, export_name) {
        }
      }

-      if (scan_op == "invoke") {
+      // Keep invoke segment counting consistent with main-loop peephole:
+      // inline text intrinsic call sequence does not emit an invoke.
+      if (scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "text") {
+        if (si + 4 < length(instrs)) {
+          peek1 = instrs[si]
+          peek2 = instrs[si + 1]
+          peek3 = instrs[si + 2]
+          peek4 = instrs[si + 3]
+          peek5 = instrs[si + 4]
+          if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 &&
+              is_array(peek2) && peek2[0] == "null" &&
+              is_array(peek3) && peek3[0] == "setarg" &&
+              is_array(peek4) && peek4[0] == "setarg" &&
+              is_array(peek5) && peek5[0] == "invoke") {
+            text_frame_slot = peek1[1]
+            text_this_slot = peek2[1]
+            if (peek3[1] == text_frame_slot && peek3[2] == 0 && peek3[3] == text_this_slot &&
+                peek4[1] == text_frame_slot && peek4[2] == 1 &&
+                peek5[1] == text_frame_slot && peek5[2] == text_this_slot) {
+              si = si + 5
+              continue
+            }
+          }
+        }
+      }
+
+      if (use_invoke_trampoline && (scan_op == "invoke" || scan_op == "tail_invoke")) {
        invoke_count = invoke_count + 1
      }
      // Track terminators — same set as in the main loop
-      if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "tail_invoke" || scan_op == "disrupt") {
+      if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "disrupt") {
        scan_dead = true
      }
    }
-    has_invokes = invoke_count > 0
+    has_invokes = use_invoke_trampoline && invoke_count > 0

    // Function signature: (ctx, frame_ptr) → JSValue
    emit(`export function l $${name}(l %ctx, l %fp) {`)
@@ -1524,6 +1557,39 @@ var qbe_emit = function(ir, qbe, export_name) {
        }
      }

+      // Peephole: inline `text(x)` intrinsic call sequence
+      // access text; frame; null this; setarg 0 this; setarg 1 x; invoke
+      if (op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "text") {
+        if (instr_idx + 5 < length(instrs)) {
+          peek1 = instrs[instr_idx + 1]
+          peek2 = instrs[instr_idx + 2]
+          peek3 = instrs[instr_idx + 3]
+          peek4 = instrs[instr_idx + 4]
+          peek5 = instrs[instr_idx + 5]
+          if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 &&
+              is_array(peek2) && peek2[0] == "null" &&
+              is_array(peek3) && peek3[0] == "setarg" &&
+              is_array(peek4) && peek4[0] == "setarg" &&
+              is_array(peek5) && peek5[0] == "invoke") {
+            text_frame_slot = peek1[1]
+            text_this_slot = peek2[1]
+            if (peek3[1] == text_frame_slot && peek3[2] == 0 && peek3[3] == text_this_slot &&
+                peek4[1] == text_frame_slot && peek4[2] == 1 &&
+                peek5[1] == text_frame_slot && peek5[2] == text_this_slot) {
+              text_arg_slot = peek4[3]
+              text_dest_slot = peek5[2]
+              v = s_read(text_arg_slot)
+              p = fresh()
+              emit(`  %${p}_r =l call $JS_CellText(l %ctx, l ${v})`)
+              refresh_fp()
+              s_write(text_dest_slot, `%${p}_r`)
+              i = instr_idx + 6
+              continue
+            }
+          }
+        }
+      }
+
      // --- Constants ---

      if (op == "int") {
@@ -2416,42 +2482,33 @@ var qbe_emit = function(ir, qbe, export_name) {
        emit(`  storel ${lhs}, %${p}_slot`)
        continue
      }
-      if (op == "invoke") {
-        // Dispatch loop invoke: store resume info, signal, return 0
-        seg_counter = seg_counter + 1
-        seg_num = seg_counter
-        // Store (seg_num << 16 | result_slot) as tagged int in frame->address
-        resume_val = seg_num * 65536 + a2
-        // frame->address is at fp - 8, store as tagged int (n << 1)
-        emit(`  %_inv_addr${text(seg_num)} =l sub %fp, 8`)
-        emit(`  storel ${text(resume_val * 2)}, %_inv_addr${text(seg_num)}`)
-        emit(`  call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`)
-        emit("  ret 0")
-        emit(`@_seg${text(seg_num)}`)
-        // Check for exception marker in destination slot after resume.
-        // Dispatch writes JS_EXCEPTION into ret_slot on exceptional return.
-        rv = s_read(a2)
-        p = fresh()
-        emit(`  %${p} =w ceql ${rv}, ${text(qbe.js_exception)}`)
-        if (has_handler && !in_handler) {
-          emit(`  jnz %${p}, @disruption_handler, @${p}_ok`)
+      if (op == "invoke" || op == "tail_invoke") {
+        if (use_invoke_trampoline) {
+          // Signal dispatcher to call frame in slot a1 and resume at @_segN.
+          seg_counter = seg_counter + 1
+          resume_val = seg_counter * 65536 + a2
+          p = fresh()
+          emit(`  %${p}_addrp =l sub %fp, 8`)
+          // frame->address holds JS_NewInt32((seg << 16) | ret_slot), tagged.
+          emit(`  storel ${text(resume_val * 2)}, %${p}_addrp`)
+          emit(`  call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`)
+          emit(`  ret ${text(qbe.js_null)}`)
+          emit(`@_seg${text(seg_counter)}`)
+          // Dispatcher writes JS_EXCEPTION into ret slot on error; branch here.
+          v = s_read(a2)
+          emit(`  %${p}_exc =w ceql ${v}, ${text(qbe.js_exception)}`)
+          if (has_handler && !in_handler) {
+            emit(`  jnz %${p}_exc, @disruption_handler, @${p}_ok`)
+          } else {
+            needs_exc_ret = true
+            emit(`  jnz %${p}_exc, @_exc_ret, @${p}_ok`)
+          }
+          emit(`@${p}_ok`)
        } else {
-          needs_exc_ret = true
-          emit(`  jnz %${p}, @_exc_ret, @${p}_ok`)
+          // Direct helper invoke path (disabled by default).
+          emit(`  %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`)
+          emit_exc_check()
        }
-        emit(`@${p}_ok`)
-        last_was_term = false
-        continue
-      }
-      if (op == "tail_invoke") {
-        // Tail call: hand control to dispatch loop and do not resume this segment.
-        // Use 0xFFFF as ret_slot (no result writeback into current frame).
-        p = fresh()
-        emit(`  %${p}_addr =l sub %fp, 8`)
-        emit(`  storel ${text(65535 * 2)}, %${p}_addr`)
-        emit(`  call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`)
-        emit("  ret 0")
-        last_was_term = true
        continue
      }
      if (op == "goframe") {
@@ -2460,13 +2517,30 @@ var qbe_emit = function(ir, qbe, export_name) {
        continue
      }
      if (op == "goinvoke") {
-        // Dispatch loop tail call: signal tail call and return 0
-        // Use 0xFFFF as ret_slot (no result to store — it's a tail call)
-        p = fresh()
-        emit(`  %${p}_addr =l sub %fp, 8`)
-        emit(`  storel ${text(65535 * 2)}, %${p}_addr`)
-        emit(`  call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`)
-        emit("  ret 0")
+        if (use_invoke_trampoline) {
+          // Tail call via dispatcher: no resume in this frame.
+          emit(`  call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`)
+          emit(`  ret ${text(qbe.js_null)}`)
+        } else {
+          // Direct helper goinvoke path (disabled by default).
+          v = s_read(a1)
+          p = fresh()
+          emit(`  %${p}_r =l call $cell_rt_goinvoke(l %ctx, l ${v})`)
+          emit(`  %${p}_exc =w ceql %${p}_r, ${text(qbe.js_exception)}`)
+          if (has_handler && !in_handler) {
+            emit(`  jnz %${p}_exc, @${p}_exc, @${p}_ok`)
+            emit(`@${p}_exc`)
+            emit(`  %fp =l call $cell_rt_refresh_fp(l %ctx)`)
+            emit(`  jmp @disruption_handler`)
+            emit(`@${p}_ok`)
+            emit(`  ret %${p}_r`)
+          } else {
+            needs_exc_ret = true
+            emit(`  jnz %${p}_exc, @_exc_ret, @${p}_ok`)
+            emit(`@${p}_ok`)
+            emit(`  ret %${p}_r`)
+          }
+        }
        last_was_term = true
        continue
      }
--- a/source/cell.c
+++ b/source/cell.c
@@ -37,6 +37,7 @@ static char *compute_blake2_hex(const char *data, size_t size) {
  uint8_t hash[32];
  crypto_blake2b(hash, 32, (const uint8_t *)data, size);
  char *hex = malloc(65);
+  if (!hex) return NULL;
  for (int i = 0; i < 32; i++)
    snprintf(hex + i * 2, 3, "%02x", hash[i]);
  return hex;
@@ -64,6 +65,7 @@ static int write_cache_file(const char *path, const uint8_t *data, size_t size)
 // Returns heap-allocated binary data and sets *out_size, or NULL on failure
 static char *load_or_cache_bootstrap(const char *mcode_data, size_t mcode_size, size_t *out_size) {
  char *hex = compute_blake2_hex(mcode_data, mcode_size);
+  if (!hex) return NULL;
  char *cpath = build_cache_path(hex);
  free(hex);

@@ -222,6 +224,7 @@ static char *try_engine_cache(size_t *out_size) {

  char *hex = compute_blake2_hex(src, src_size);
  free(src);
+  if (!hex) return NULL;
  char *cpath = build_cache_path(hex);
  if (!cpath) { free(hex); return NULL; }
  free(hex);
--- a/source/qbe_helpers.c
+++ b/source/qbe_helpers.c
@@ -583,11 +583,13 @@ void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth,
 #define AOT_GC_REF_CHUNK_SIZE 1024
 typedef struct AOTGCRefChunk {
  JSGCRef refs[AOT_GC_REF_CHUNK_SIZE];
+  uint8_t inited[AOT_GC_REF_CHUNK_SIZE];
 } AOTGCRefChunk;

 static CELL_THREAD_LOCAL AOTGCRefChunk **g_aot_gc_ref_chunks = NULL;
 static CELL_THREAD_LOCAL int g_aot_gc_ref_chunk_count = 0;
 static CELL_THREAD_LOCAL int g_aot_depth = 0;
+static CELL_THREAD_LOCAL JSContext *g_aot_gc_ref_ctx = NULL;

 int cell_rt_native_active(void) {
  return g_aot_depth > 0;
@@ -624,14 +626,50 @@ static inline JSGCRef *aot_gc_ref_at(int depth_index) {
  return &g_aot_gc_ref_chunks[chunk_index]->refs[slot_index];
 }

+static inline uint8_t *aot_gc_ref_inited_at(int depth_index) {
+  int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE;
+  int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE;
+  return &g_aot_gc_ref_chunks[chunk_index]->inited[slot_index];
+}
+
+/* GC refs are owned by a specific JSContext. If context changes on this thread,
+   unregister previous refs and reset per-slot initialization state. */
+static void aot_gc_ref_reset_ctx(JSContext *ctx) {
+  if (g_aot_gc_ref_ctx == ctx)
+    return;
+  if (g_aot_gc_ref_ctx) {
+    for (int ci = 0; ci < g_aot_gc_ref_chunk_count; ci++) {
+      AOTGCRefChunk *chunk = g_aot_gc_ref_chunks[ci];
+      for (int si = 0; si < AOT_GC_REF_CHUNK_SIZE; si++) {
+        if (chunk->inited[si]) {
+          JS_DeleteGCRef(g_aot_gc_ref_ctx, &chunk->refs[si]);
+          chunk->inited[si] = 0;
+          chunk->refs[si].val = JS_NULL;
+        }
+      }
+    }
+  }
+  g_aot_gc_ref_ctx = ctx;
+}
+
+static inline void aot_gc_ref_activate(JSContext *ctx, int depth_index) {
+  JSGCRef *ref = aot_gc_ref_at(depth_index);
+  uint8_t *inited = aot_gc_ref_inited_at(depth_index);
+  if (!*inited) {
+    JS_AddGCRef(ctx, ref);
+    *inited = 1;
+  }
+}
+
 JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) {
+  aot_gc_ref_reset_ctx(ctx);
  if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) {
    return NULL;
  }
  JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots);
  if (!frame) return NULL;
+  aot_gc_ref_activate(ctx, g_aot_depth);
  JSGCRef *ref = aot_gc_ref_at(g_aot_depth);
-  JS_AddGCRef(ctx, ref);
  ref->val = JS_MKPTR(frame);
  g_aot_depth++;
  return (JSValue *)frame->slots;
@@ -639,10 +677,11 @@ JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) {

 /* Push an already-allocated frame onto the active AOT frame stack. */
 static int cell_rt_push_existing_frame(JSContext *ctx, JSValue frame_val) {
+  aot_gc_ref_reset_ctx(ctx);
  if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth))
    return 0;
+  aot_gc_ref_activate(ctx, g_aot_depth);
  JSGCRef *ref = aot_gc_ref_at(g_aot_depth);
-  JS_AddGCRef(ctx, ref);
  ref->val = frame_val;
  g_aot_depth++;
  return 1;
@@ -682,12 +721,13 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) {
 }

 void cell_rt_leave_frame(JSContext *ctx) {
+  (void)ctx;
  if (g_aot_depth <= 0) {
    fprintf(stderr, "[BUG] cell_rt_leave_frame underflow\n");
    abort();
  }
  g_aot_depth--;
-  JS_DeleteGCRef(ctx, aot_gc_ref_at(g_aot_depth));
+  aot_gc_ref_at(g_aot_depth)->val = JS_NULL;
 }

 /* --- Function creation and calling --- */
--- a/source/quickjs-internal.h
+++ b/source/quickjs-internal.h
@@ -1426,8 +1426,6 @@ static JSValue js_cell_splat (JSContext *ctx, JSValue this_val, int argc, JSValu
 static JSValue js_cell_meme (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_cell_fn_apply (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_cell_call (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_modulo (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_neg (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_cell_not (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 JSValue js_cell_text_lower (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 JSValue js_cell_text_upper (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
@@ -1438,17 +1436,6 @@ static JSValue js_cell_text_search (JSContext *ctx, JSValue this_val, int argc,
 static JSValue js_cell_text_extract (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 JSValue js_cell_character (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_cell_number (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_abs (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_sign (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_floor (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_ceiling (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_round (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_trunc (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_whole (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_fraction (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_min (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_max (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
-static JSValue js_cell_number_remainder (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_cell_object (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_cell_text_format (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
 static JSValue js_print (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
--- a/source/runtime.c
+++ b/source/runtime.c
@@ -10514,15 +10514,46 @@ JSValue JS_CellCall (JSContext *ctx, JSValue fn, JSValue this_val, JSValue args)
  return js_cell_call (ctx, JS_NULL, argc, argv);
 }

+static int js_cell_read_number_strict (JSValue val, double *out) {
+  uint32_t tag = JS_VALUE_GET_TAG (val);
+  if (tag == JS_TAG_INT) {
+    *out = (double)JS_VALUE_GET_INT (val);
+    return 0;
+  }
+  if (JS_TAG_IS_FLOAT64 (tag)) {
+    *out = JS_VALUE_GET_FLOAT64 (val);
+    return 0;
+  }
+  return -1;
+}
+
+static JSValue js_cell_number_from_double (JSContext *ctx, double d) {
+  if (d >= INT32_MIN && d <= INT32_MAX) {
+    int32_t i = (int32_t)d;
+    if ((double)i == d)
+      return JS_NewInt32 (ctx, i);
+  }
+  return JS_NewFloat64 (ctx, d);
+}
+
 /* C API: modulo(a, b) - modulo operation */
 JSValue JS_CellModulo (JSContext *ctx, JSValue a, JSValue b) {
-  JSValue argv[2] = { a, b };
-  return js_cell_modulo (ctx, JS_NULL, 2, argv);
+  double dividend, divisor;
+  if (js_cell_read_number_strict (a, &dividend) < 0) return JS_NULL;
+  if (js_cell_read_number_strict (b, &divisor) < 0) return JS_NULL;
+  if (isnan (dividend) || isnan (divisor)) return JS_NULL;
+  if (divisor == 0.0) return JS_NULL;
+  if (dividend == 0.0) return JS_NewFloat64 (ctx, 0.0);
+  return js_cell_number_from_double (ctx,
+    dividend - (divisor * floor (dividend / divisor)));
 }

 /* C API: neg(val) - negate number */
 JSValue JS_CellNeg (JSContext *ctx, JSValue val) {
-  return js_cell_neg (ctx, JS_NULL, 1, &val);
+  double d;
+  if (js_cell_read_number_strict (val, &d) < 0) return JS_NULL;
+  if (isnan (d)) return JS_NULL;
+  return js_cell_number_from_double (ctx, -d);
 }

 /* C API: not(val) - logical not */
@@ -10647,60 +10678,86 @@ JSValue JS_CellNumber (JSContext *ctx, JSValue val) {

 /* C API: abs(num) - absolute value */
 JSValue JS_CellAbs (JSContext *ctx, JSValue num) {
-  return js_cell_number_abs (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, fabs (d));
 }

 /* C API: sign(num) - sign of number (-1, 0, 1) */
 JSValue JS_CellSign (JSContext *ctx, JSValue num) {
-  return js_cell_number_sign (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  if (d < 0) return JS_NewInt32 (ctx, -1);
+  if (d > 0) return JS_NewInt32 (ctx, 1);
+  return JS_NewInt32 (ctx, 0);
 }

 /* C API: floor(num) - floor */
 JSValue JS_CellFloor (JSContext *ctx, JSValue num) {
-  return js_cell_number_floor (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, floor (d));
 }

 /* C API: ceiling(num) - ceiling */
 JSValue JS_CellCeiling (JSContext *ctx, JSValue num) {
-  return js_cell_number_ceiling (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, ceil (d));
 }

 /* C API: round(num) - round to nearest integer */
 JSValue JS_CellRound (JSContext *ctx, JSValue num) {
-  return js_cell_number_round (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, round (d));
 }

 /* C API: trunc(num) - truncate towards zero */
 JSValue JS_CellTrunc (JSContext *ctx, JSValue num) {
-  return js_cell_number_trunc (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, trunc (d));
 }

 /* C API: whole(num) - integer part */
 JSValue JS_CellWhole (JSContext *ctx, JSValue num) {
-  return js_cell_number_whole (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, trunc (d));
 }

 /* C API: fraction(num) - fractional part */
 JSValue JS_CellFraction (JSContext *ctx, JSValue num) {
-  return js_cell_number_fraction (ctx, JS_NULL, 1, &num);
+  double d;
+  if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, d - trunc (d));
 }

 /* C API: min(a, b) - minimum of two numbers */
 JSValue JS_CellMin (JSContext *ctx, JSValue a, JSValue b) {
-  JSValue argv[2] = { a, b };
-  return js_cell_number_min (ctx, JS_NULL, 2, argv);
+  double da, db;
+  if (js_cell_read_number_strict (a, &da) < 0) return JS_NULL;
+  if (js_cell_read_number_strict (b, &db) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, da < db ? da : db);
 }

 /* C API: max(a, b) - maximum of two numbers */
 JSValue JS_CellMax (JSContext *ctx, JSValue a, JSValue b) {
-  JSValue argv[2] = { a, b };
-  return js_cell_number_max (ctx, JS_NULL, 2, argv);
+  double da, db;
+  if (js_cell_read_number_strict (a, &da) < 0) return JS_NULL;
+  if (js_cell_read_number_strict (b, &db) < 0) return JS_NULL;
+  return js_cell_number_from_double (ctx, da > db ? da : db);
 }

 /* C API: remainder(a, b) - remainder after division */
 JSValue JS_CellRemainder (JSContext *ctx, JSValue a, JSValue b) {
-  JSValue argv[2] = { a, b };
-  return js_cell_number_remainder (ctx, JS_NULL, 2, argv);
+  double dividend, divisor;
+  if (js_cell_read_number_strict (a, &dividend) < 0) return JS_NULL;
+  if (js_cell_read_number_strict (b, &divisor) < 0) return JS_NULL;
+  if (divisor == 0.0) return JS_NULL;
+  return js_cell_number_from_double (ctx,
+    dividend - (trunc (dividend / divisor) * divisor));
 }

 /* Object functions */
@@ -11374,7 +11431,7 @@ static void JS_AddIntrinsicBaseObjects (JSContext *ctx) {
    js_set_global_cfunc(ctx, "filter", js_cell_array_filter, 2);
    js_set_global_cfunc(ctx, "sort", js_cell_array_sort, 2);

-    /* Number utility functions */
+    /* Number intrinsics: direct calls lower to mcode; globals remain for first-class use. */
    js_set_global_cfunc(ctx, "whole", js_cell_number_whole, 1);
    js_set_global_cfunc(ctx, "fraction", js_cell_number_fraction, 1);
    js_set_global_cfunc(ctx, "floor", js_cell_number_floor, 2);