aot compile vm_suite

2026-02-17 03:33:21 -06:00
parent c9dad91ea1
commit 9e42a28d55
7 changed files with 684 additions and 52 deletions
--- a/build.cm
+++ b/build.cm
@@ -576,6 +576,93 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
  return dylib_path
 }

+// Compile pre-compiled mcode IR to a native .dylib via QBE.
+// Use this when the caller already has the optimized IR (avoids calling mcode
+// twice and hitting module-level state pollution).
+Build.compile_native_ir = function(optimized, src_path, opts) {
+  var _target = (opts && opts.target) || Build.detect_host_target()
+  var _buildtype = (opts && opts.buildtype) || 'release'
+  var pkg = opts && opts.pkg
+  var qbe_rt_path = null
+  var native_stem = null
+  var native_install_dir = null
+  var native_install_path = null
+
+  var tc = toolchains[_target]
+  var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so')
+  var cc = tc.c
+
+  var qbe_macros = use('qbe')
+  var qbe_emit = use('qbe_emit')
+
+  var sym_name = null
+  if (pkg) {
+    sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
+  }
+  var il = qbe_emit(optimized, qbe_macros, sym_name)
+  il = qbe_insert_dead_labels(il)
+
+  var src = text(fd.slurp(src_path))
+  var hash = content_hash(src + '\n' + _target + '\nnative')
+  var build_dir = get_build_dir()
+  ensure_dir(build_dir)
+
+  var dylib_path = build_dir + '/' + hash + '.' + _target + dylib_ext
+  if (fd.is_file(dylib_path))
+    return dylib_path
+
+  var tmp = '/tmp/cell_native_' + hash
+  var ssa_path = tmp + '.ssa'
+  var s_path = tmp + '.s'
+  var o_path = tmp + '.o'
+  var rt_o_path = '/tmp/cell_qbe_rt.o'
+
+  fd.slurpwrite(ssa_path, stone(blob(il)))
+
+  var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
+  if (rc != 0) {
+    print('QBE compilation failed for: ' + src_path); disrupt
+  }
+
+  rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
+  if (rc != 0) {
+    print('Assembly failed for: ' + src_path); disrupt
+  }
+
+  if (!fd.is_file(rt_o_path)) {
+    qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
+    rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
+    if (rc != 0) {
+      print('QBE runtime stubs compilation failed'); disrupt
+    }
+  }
+
+  var link_cmd = cc + ' -shared -fPIC'
+  if (tc.system == 'darwin') {
+    link_cmd = link_cmd + ' -undefined dynamic_lookup'
+  } else if (tc.system == 'linux') {
+    link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
+  }
+  link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
+
+  rc = os.system(link_cmd)
+  if (rc != 0) {
+    print('Linking native dylib failed for: ' + src_path); disrupt
+  }
+
+  log.console('Built native: ' + fd.basename(dylib_path))
+
+  if (pkg) {
+    native_stem = fd.basename(src_path)
+    native_install_dir = shop.get_lib_dir() + '/' + shop.lib_name_for_package(pkg)
+    ensure_dir(native_install_dir)
+    native_install_path = native_install_dir + '/' + native_stem + dylib_ext
+    fd.slurpwrite(native_install_path, fd.slurp(dylib_path))
+  }
+
+  return dylib_path
+}
+
 // ============================================================================
 // Module table generation (for static builds)
 // ============================================================================
--- a/compare_aot.ce
+++ b/compare_aot.ce
@@ -1,7 +1,7 @@
-// compare_aot.ce — compile a .cm module via both paths and compare results
+// compare_aot.ce — compile a .ce/.cm file via both paths and compare results
 //
 // Usage:
-//   cell --dev compare_aot.ce <module.cm>
+//   cell --dev compare_aot.ce <file.ce>

 var build = use('build')
 var fd_mod = use('fd')
@@ -9,17 +9,20 @@ var os = use('os')
 var json = use('json')

 var show = function(v) {
+  if (v == null) return "null"
  return json.encode(v)
 }

 if (length(args) < 1) {
-  print('usage: cell --dev compare_aot.ce <module.cm>')
+  print('usage: cell --dev compare_aot.ce <file>')
  return
 }

 var file = args[0]
 if (!fd_mod.is_file(file)) {
-  if (!ends_with(file, '.cm') && fd_mod.is_file(file + '.cm'))
+  if (!ends_with(file, '.ce') && fd_mod.is_file(file + '.ce'))
+    file = file + '.ce'
+  else if (!ends_with(file, '.cm') && fd_mod.is_file(file + '.cm'))
    file = file + '.cm'
  else {
    print('file not found: ' + file)
@@ -43,50 +46,68 @@ var folded = fold(ast)
 var compiled = mcode_mod(folded)
 var optimized = streamline_mod(compiled)

-// --- Interpreted (mach VM) ---
-print('--- interpreted ---')
-var mcode_json = json.encode(optimized)
-var mach_blob = mach_compile_mcode_bin(abs, mcode_json)
-var result_interp = mach_load(mach_blob, stone({}))
-print('result: ' + show(result_interp))
-
-// --- Native (AOT via QBE) ---
-print('\n--- native ---')
-var dylib_path = build.compile_native(abs, null, null, null)
-print('dylib: ' + dylib_path)
-
-var handle = os.dylib_open(dylib_path)
-if (!handle) {
-  print('failed to open dylib')
-  return
-}
-
-// Build env with runtime functions. Must include starts_with etc. because
-// the GC can lose global object properties after compaction.
+// Shared env for both paths — only non-intrinsic runtime functions.
+// Intrinsics (starts_with, ends_with, logical, some, every, etc.) live on
+// the stoned global and are found via GETINTRINSIC/cell_rt_get_intrinsic.
 var env = stone({
-  logical: logical,
-  some: some,
-  every: every,
-  starts_with: starts_with,
-  ends_with: ends_with,
  log: log,
  fallback: fallback,
  parallel: parallel,
  race: race,
-  sequence: sequence
+  sequence: sequence,
+  use
 })

-var result_native = os.native_module_load(handle, env)
-print('result: ' + show(result_native))
+// --- Interpreted (mach VM) ---
+var result_interp = null
+var interp_ok = false
+var run_interp = function() {
+  print('--- interpreted ---')
+  var mcode_json = json.encode(optimized)
+  var mach_blob = mach_compile_mcode_bin(abs, mcode_json)
+  result_interp = mach_load(mach_blob, env)
+  interp_ok = true
+  print('result: ' + show(result_interp))
+} disruption {
+  interp_ok = true
+  print('(disruption escaped from interpreted run)')
+}
+run_interp()
+
+// --- Native (AOT via QBE) ---
+var result_native = null
+var native_ok = false
+var run_native = function() {
+  print('\n--- native ---')
+  var dylib_path = build.compile_native_ir(optimized, abs, null)
+  print('dylib: ' + dylib_path)
+  var handle = os.dylib_open(dylib_path)
+  if (!handle) {
+    print('failed to open dylib')
+    return
+  }
+  result_native = os.native_module_load(handle, env)
+  native_ok = true
+  print('result: ' + show(result_native))
+} disruption {
+  native_ok = true
+  print('(disruption escaped from native run)')
+}
+run_native()

 // --- Comparison ---
 print('\n--- comparison ---')
 var s_interp = show(result_interp)
 var s_native = show(result_native)
-if (s_interp == s_native) {
-  print('MATCH')
+if (interp_ok && native_ok) {
+  if (s_interp == s_native) {
+    print('MATCH')
+  } else {
+    print('MISMATCH')
+    print('  interp: ' + s_interp)
+    print('  native: ' + s_native)
+  }
 } else {
-  print('MISMATCH')
-  print('  interp: ' + s_interp)
-  print('  native: ' + s_native)
+  if (!interp_ok) print('interpreted run failed')
+  if (!native_ok) print('native run failed')
 }
--- a/qbe_emit.cm
+++ b/qbe_emit.cm
@@ -101,6 +101,7 @@ var qbe_emit = function(ir, qbe, export_name) {
    var chk = null
    var pat_label = null
    var flg_label = null
+    var in_handler = false

    // Function signature: (ctx, frame_ptr) → JSValue
    emit(`export function l $${name}(l %ctx, l %fp) {`)
@@ -127,7 +128,17 @@ var qbe_emit = function(ir, qbe, export_name) {
    }

    var refresh_fp = function() {
-      emit(`  %fp =l call $cell_rt_refresh_fp(l %ctx)`)
+      emit(`  %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`)
+      var exc = fresh()
+      emit(`  %${exc} =w ceql %fp, 0`)
+      if (has_handler && !in_handler) {
+        emit(`  jnz %${exc}, @disruption_handler, @${exc}_ok`)
+      } else {
+        emit(`  jnz %${exc}, @${exc}_exc, @${exc}_ok`)
+        emit(`@${exc}_exc`)
+        emit(`  ret 15`)
+      }
+      emit(`@${exc}_ok`)
    }

    // Walk instructions
@@ -143,7 +154,10 @@ var qbe_emit = function(ir, qbe, export_name) {
          emit("  jmp @disruption_handler")
        }
        emit("@disruption_handler")
+        emit("  call $cell_rt_clear_exception(l %ctx)")
+        emit(`  %fp =l call $cell_rt_refresh_fp(l %ctx)`)
        last_was_term = false
+        in_handler = true
      }
      i = i + 1

@@ -949,7 +963,7 @@ var qbe_emit = function(ir, qbe, export_name) {
      }
      if (op == "disrupt") {
        emit(`  call $cell_rt_disrupt(l %ctx)`)
-        if (has_handler) {
+        if (has_handler && !in_handler) {
          emit("  jmp @disruption_handler")
        } else {
          emit(`  ret 15`)
@@ -1012,11 +1026,7 @@ var qbe_emit = function(ir, qbe, export_name) {
    }
    emit("@disrupt")
    emit(`  call $cell_rt_disrupt(l %ctx)`)
-    if (has_handler) {
-      emit("  jmp @disruption_handler")
-    } else {
-      emit(`  ret 15`)
-    }
+    emit(`  ret 15`)

    emit("}")
    emit("")
--- a/source/mach.c
+++ b/source/mach.c
@@ -2084,6 +2084,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
          env = fn->u.reg.env_record;
          pc = code->disruption_pc;
          ctx->disruption_reported = FALSE;
+          ctx->current_exception = JS_NULL;
          break;
        }
        if (JS_IsNull(frame->caller)) {
@@ -2123,7 +2124,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
            }
            ctx->disruption_reported = TRUE;
          }
-          result = JS_Throw(ctx, JS_NULL);
+          result = JS_Throw(ctx, JS_TRUE);
          frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
          goto done;
        }
--- a/source/qbe_helpers.c
+++ b/source/qbe_helpers.c
@@ -282,12 +282,25 @@ void cell_rt_set_native_env(JSContext *ctx, JSValue env) {
 }

 JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) {
-  /* Check native env first (runtime-provided functions like starts_with) */
+  /* Check native env first (runtime-provided functions like log) */
  if (g_has_native_env) {
    JSValue v = JS_GetPropertyStr(ctx, g_native_env_ref.val, name);
-    if (!JS_IsNull(v)) return v;
+    if (!JS_IsNull(v))
+      return v;
  }
-  return JS_GetPropertyStr(ctx, ctx->global_obj, name);
+  /* Linear scan of global object — avoids hash mismatch issues with
+     stoned records whose keys may be in cold storage */
+  JSValue gobj = ctx->global_obj;
+  if (JS_IsRecord(gobj)) {
+    JSRecord *rec = (JSRecord *)chase(gobj);
+    uint64_t mask = objhdr_cap56(rec->mist_hdr);
+    for (uint64_t i = 1; i <= mask; i++) {
+      if (js_key_equal_str(rec->slots[i].key, name))
+        return rec->slots[i].val;
+    }
+  }
+  JS_ThrowReferenceError(ctx, "'%s' is not defined", name);
+  return JS_EXCEPTION;
 }

 /* --- Closure access ---
@@ -349,8 +362,36 @@ JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) {

 JSValue *cell_rt_refresh_fp(JSContext *ctx) {
  (void)ctx;
-  JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(
-    g_aot_gc_refs[g_aot_depth - 1].val);
+  if (g_aot_depth <= 0) {
+    fprintf(stderr, "[BUG] cell_rt_refresh_fp: g_aot_depth=%d\n", g_aot_depth);
+    abort();
+  }
+  JSValue val = g_aot_gc_refs[g_aot_depth - 1].val;
+  JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val);
+  if (!frame) {
+    fprintf(stderr, "[BUG] cell_rt_refresh_fp: frame is NULL at depth=%d val=%lld\n",
+            g_aot_depth, (long long)val);
+    abort();
+  }
+  return (JSValue *)frame->slots;
+}
+
+/* Combined refresh + exception check in a single call.
+   Returns the refreshed fp, or NULL if there is a pending exception.
+   This avoids QBE register-allocation issues from two consecutive calls. */
+JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) {
+  if (JS_HasException(ctx))
+    return NULL;
+  if (g_aot_depth <= 0) {
+    fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: g_aot_depth=%d\n", g_aot_depth);
+    abort();
+  }
+  JSValue val = g_aot_gc_refs[g_aot_depth - 1].val;
+  JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val);
+  if (!frame) {
+    fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: frame is NULL\n");
+    abort();
+  }
  return (JSValue *)frame->slots;
 }

@@ -464,12 +505,13 @@ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) {
 }

 void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) {
-  if (frame_val == JS_EXCEPTION) return;
+  if (frame_val == JS_EXCEPTION || frame_val == JS_NULL) return;
  JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
  fr->slots[idx] = val;
 }

 JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
+  if (frame_val == JS_EXCEPTION) return JS_EXCEPTION;
  JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
  int nr_slots = (int)objhdr_cap56(fr->header);
  int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
@@ -580,6 +622,15 @@ JSValue cell_rt_or(JSContext *ctx, JSValue left, JSValue right) {
  return JS_ToBool(ctx, left) ? left : right;
 }

+/* --- Exception checking ---
+   After potentially-throwing runtime calls, QBE-generated code needs to
+   check for pending exceptions and branch to the disruption handler. */
+
+void cell_rt_clear_exception(JSContext *ctx) {
+  if (JS_HasException(ctx))
+    JS_GetException(ctx);
+}
+
 /* --- Disruption --- */

 void cell_rt_disrupt(JSContext *ctx) {
@@ -630,6 +681,10 @@ JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env)
    return JS_ThrowTypeError(ctx, "frame allocation failed");
  }

+  /* Clear any stale exception left by a previous interpreted run */
+  if (JS_HasException(ctx))
+    JS_GetException(ctx);
+
  JSValue result = fn(ctx, fp);
  cell_rt_leave_frame(ctx);  /* safe — closures have independent GC refs */
  g_current_dl_handle = prev_handle;
--- a/source/runtime.c
+++ b/source/runtime.c
@@ -3076,7 +3076,7 @@ JSValue JS_ThrowError2 (JSContext *ctx, JSErrorEnum error_num, const char *fmt,
  if (add_backtrace) {
    print_backtrace (ctx, NULL, 0, 0);
  }
-  return JS_Throw (ctx, JS_NULL);
+  return JS_Throw (ctx, JS_TRUE);
 }

 static JSValue JS_ThrowError (JSContext *ctx, JSErrorEnum error_num, const char *fmt, va_list ap) {
--- a/streamline.cm
+++ b/streamline.cm
@@ -1020,6 +1020,461 @@ var streamline = function(ir, log) {
    return null
  }

+  // =========================================================
+  // Pass: compress_slots — linear-scan register allocation
+  // Reuses slots with non-overlapping live ranges to reduce
+  // nr_slots. Mirrors mcode_compress_regs from mach.c.
+  // Works across all functions for captured-slot tracking.
+  // =========================================================
+
+  // Which instruction positions hold slot references (special cases)
+  var slot_idx_special = {
+    get: [1], put: [1],
+    access: [1], int: [1], function: [1], regexp: [1],
+    true: [1], false: [1], null: [1],
+    record: [1], array: [1],
+    invoke: [1, 2], tail_invoke: [1, 2],
+    goinvoke: [1],
+    setarg: [1, 3],
+    frame: [1, 2], goframe: [1, 2],
+    jump: [], disrupt: [],
+    jump_true: [1], jump_false: [1], jump_not_null: [1],
+    return: [1]
+  }
+
+  var get_slot_refs = function(instr) {
+    var special = slot_idx_special[instr[0]]
+    var result = null
+    var j = 0
+    var limit = 0
+    if (special != null) return special
+    result = []
+    limit = length(instr) - 2
+    j = 1
+    while (j < limit) {
+      if (is_number(instr[j])) result[] = j
+      j = j + 1
+    }
+    return result
+  }
+
+  var compress_one_fn = function(func, captured_slots) {
+    var instructions = func.instructions
+    var nr_slots = func.nr_slots
+    var nr_args = func.nr_args != null ? func.nr_args : 0
+    var n = 0
+    var pinned = 0
+    var first_ref = null
+    var last_ref = null
+    var i = 0
+    var j = 0
+    var k = 0
+    var s = 0
+    var instr = null
+    var refs = null
+    var op = null
+    var target = null
+    var tpos = 0
+    var changed = false
+    var label_map = null
+    var live_slots = null
+    var live_first = null
+    var live_last = null
+    var cnt = 0
+    var key_s = 0
+    var key_f = 0
+    var key_l = 0
+    var remap = null
+    var pool = null
+    var next_phys = 0
+    var active_phys = null
+    var active_last = null
+    var phys = 0
+    var mi = 0
+    var new_max = 0
+    var old_val = 0
+    var new_active_phys = null
+    var new_active_last = null
+    var new_pool = null
+
+    if (instructions == null || !is_number(nr_slots) || nr_slots <= 1) return null
+    n = length(instructions)
+    pinned = 1 + nr_args
+
+    // Step 1: build live ranges
+    first_ref = array(nr_slots, -1)
+    last_ref = array(nr_slots, -1)
+
+    // Pin this + args
+    k = 0
+    while (k < pinned) {
+      first_ref[k] = 0
+      last_ref[k] = n
+      k = k + 1
+    }
+
+    // Scan instructions for slot references
+    i = 0
+    while (i < n) {
+      instr = instructions[i]
+      if (is_array(instr)) {
+        refs = get_slot_refs(instr)
+        j = 0
+        while (j < length(refs)) {
+          s = instr[refs[j]]
+          if (is_number(s) && s >= 0 && s < nr_slots) {
+            if (first_ref[s] < 0) first_ref[s] = i
+            last_ref[s] = i
+          }
+          j = j + 1
+        }
+      }
+      i = i + 1
+    }
+
+    // Pin captured slots (AFTER scan so last_ref isn't overwritten)
+    if (captured_slots != null) {
+      k = 0
+      while (k < length(captured_slots)) {
+        s = captured_slots[k]
+        if (s >= 0 && s < nr_slots) {
+          if (first_ref[s] < 0) first_ref[s] = 0
+          last_ref[s] = n
+        }
+        k = k + 1
+      }
+    }
+
+    // Step 1b: extend for backward jumps (loops)
+    label_map = {}
+    i = 0
+    while (i < n) {
+      instr = instructions[i]
+      if (is_text(instr) && !starts_with(instr, "_nop_")) {
+        label_map[instr] = i
+      }
+      i = i + 1
+    }
+
+    changed = true
+    while (changed) {
+      changed = false
+      i = 0
+      while (i < n) {
+        instr = instructions[i]
+        if (!is_array(instr)) {
+          i = i + 1
+          continue
+        }
+        op = instr[0]
+        target = null
+        if (op == "jump") {
+          target = instr[1]
+        } else if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
+          target = instr[2]
+        }
+        if (target == null || !is_text(target)) {
+          i = i + 1
+          continue
+        }
+        tpos = label_map[target]
+        if (tpos == null || tpos >= i) {
+          i = i + 1
+          continue
+        }
+        // Backward jump: extend slots live into loop
+        s = pinned
+        while (s < nr_slots) {
+          if (first_ref[s] >= 0 && first_ref[s] < tpos && last_ref[s] >= tpos && last_ref[s] < i) {
+            last_ref[s] = i
+            changed = true
+          }
+          s = s + 1
+        }
+        i = i + 1
+      }
+    }
+
+    // Step 2: sort live intervals by first_ref
+    live_slots = []
+    live_first = []
+    live_last = []
+    s = pinned
+    while (s < nr_slots) {
+      if (first_ref[s] >= 0) {
+        live_slots[] = s
+        live_first[] = first_ref[s]
+        live_last[] = last_ref[s]
+      }
+      s = s + 1
+    }
+
+    cnt = length(live_slots)
+    i = 1
+    while (i < cnt) {
+      key_s = live_slots[i]
+      key_f = live_first[i]
+      key_l = live_last[i]
+      j = i - 1
+      while (j >= 0 && (live_first[j] > key_f || (live_first[j] == key_f && live_slots[j] > key_s))) {
+        live_slots[j + 1] = live_slots[j]
+        live_first[j + 1] = live_first[j]
+        live_last[j + 1] = live_last[j]
+        j = j - 1
+      }
+      live_slots[j + 1] = key_s
+      live_first[j + 1] = key_f
+      live_last[j + 1] = key_l
+      i = i + 1
+    }
+
+    // Linear-scan allocation
+    remap = array(nr_slots)
+    s = 0
+    while (s < nr_slots) {
+      remap[s] = s
+      s = s + 1
+    }
+
+    pool = []
+    next_phys = pinned
+    active_phys = []
+    active_last = []
+
+    i = 0
+    while (i < cnt) {
+      // Expire intervals whose last < live_first[i]
+      new_active_phys = []
+      new_active_last = []
+      j = 0
+      while (j < length(active_phys)) {
+        if (active_last[j] < live_first[i]) {
+          pool[] = active_phys[j]
+        } else {
+          new_active_phys[] = active_phys[j]
+          new_active_last[] = active_last[j]
+        }
+        j = j + 1
+      }
+      active_phys = new_active_phys
+      active_last = new_active_last
+
+      // Pick lowest available physical register
+      if (length(pool) > 0) {
+        mi = 0
+        j = 1
+        while (j < length(pool)) {
+          if (pool[j] < pool[mi]) mi = j
+          j = j + 1
+        }
+        phys = pool[mi]
+        new_pool = []
+        j = 0
+        while (j < length(pool)) {
+          if (j != mi) new_pool[] = pool[j]
+          j = j + 1
+        }
+        pool = new_pool
+      } else {
+        phys = next_phys
+        next_phys = next_phys + 1
+      }
+
+      remap[live_slots[i]] = phys
+      active_phys[] = phys
+      active_last[] = live_last[i]
+      i = i + 1
+    }
+
+    // Compute new nr_slots
+    new_max = pinned
+    s = 0
+    while (s < nr_slots) {
+      if (first_ref[s] >= 0 && remap[s] >= new_max) {
+        new_max = remap[s] + 1
+      }
+      s = s + 1
+    }
+
+    if (new_max >= nr_slots) return null
+
+    // Step 3: apply remap to instructions
+    i = 0
+    while (i < n) {
+      instr = instructions[i]
+      if (is_array(instr)) {
+        refs = get_slot_refs(instr)
+        j = 0
+        while (j < length(refs)) {
+          old_val = instr[refs[j]]
+          if (is_number(old_val) && old_val >= 0 && old_val < nr_slots) {
+            instr[refs[j]] = remap[old_val]
+          }
+          j = j + 1
+        }
+      }
+      i = i + 1
+    }
+
+    func.nr_slots = new_max
+    return remap
+  }
+
+  var compress_slots = function(ir) {
+    if (ir == null || ir.main == null) return null
+    var functions = ir.functions != null ? ir.functions : []
+    var func_count = length(functions)
+    var parent_of = null
+    var captured = null
+    var remaps = null
+    var remap_sizes = null
+    var instrs = null
+    var instr = null
+    var child_idx = 0
+    var parent_slot = 0
+    var level = 0
+    var ancestor = 0
+    var caps = null
+    var found = false
+    var anc_remap = null
+    var old_slot = 0
+    var fi = 0
+    var i = 0
+    var j = 0
+    var k = 0
+
+    // Build parent_of: parent_of[i] = parent index, func_count = main
+    parent_of = array(func_count, -1)
+
+    // Scan main for function instructions
+    if (ir.main != null && ir.main.instructions != null) {
+      instrs = ir.main.instructions
+      i = 0
+      while (i < length(instrs)) {
+        instr = instrs[i]
+        if (is_array(instr) && instr[0] == "function") {
+          child_idx = instr[2]
+          if (child_idx >= 0 && child_idx < func_count) {
+            parent_of[child_idx] = func_count
+          }
+        }
+        i = i + 1
+      }
+    }
+
+    // Scan each function for function instructions
+    fi = 0
+    while (fi < func_count) {
+      instrs = functions[fi].instructions
+      if (instrs != null) {
+        i = 0
+        while (i < length(instrs)) {
+          instr = instrs[i]
+          if (is_array(instr) && instr[0] == "function") {
+            child_idx = instr[2]
+            if (child_idx >= 0 && child_idx < func_count) {
+              parent_of[child_idx] = fi
+            }
+          }
+          i = i + 1
+        }
+      }
+      fi = fi + 1
+    }
+
+    // Build captured slots per function
+    captured = array(func_count + 1)
+    i = 0
+    while (i < func_count + 1) {
+      captured[i] = []
+      i = i + 1
+    }
+
+    fi = 0
+    while (fi < func_count) {
+      instrs = functions[fi].instructions
+      if (instrs != null) {
+        i = 0
+        while (i < length(instrs)) {
+          instr = instrs[i]
+          if (is_array(instr) && (instr[0] == "get" || instr[0] == "put")) {
+            parent_slot = instr[2]
+            level = instr[3]
+            ancestor = fi
+            j = 0
+            while (j < level && ancestor >= 0) {
+              ancestor = parent_of[ancestor]
+              j = j + 1
+            }
+            if (ancestor >= 0) {
+              caps = captured[ancestor]
+              found = false
+              k = 0
+              while (k < length(caps)) {
+                if (caps[k] == parent_slot) {
+                  found = true
+                  k = length(caps)
+                }
+                k = k + 1
+              }
+              if (!found) caps[] = parent_slot
+            }
+          }
+          i = i + 1
+        }
+      }
+      fi = fi + 1
+    }
+
+    // Compress each function and save remap tables
+    remaps = array(func_count + 1)
+    remap_sizes = array(func_count + 1, 0)
+
+    fi = 0
+    while (fi < func_count) {
+      remap_sizes[fi] = functions[fi].nr_slots
+      remaps[fi] = compress_one_fn(functions[fi], captured[fi])
+      fi = fi + 1
+    }
+
+    if (ir.main != null) {
+      remap_sizes[func_count] = ir.main.nr_slots
+      remaps[func_count] = compress_one_fn(ir.main, captured[func_count])
+    }
+
+    // Fix get/put parent_slot references using ancestor remap tables
+    fi = 0
+    while (fi < func_count) {
+      instrs = functions[fi].instructions
+      if (instrs != null) {
+        i = 0
+        while (i < length(instrs)) {
+          instr = instrs[i]
+          if (is_array(instr) && (instr[0] == "get" || instr[0] == "put")) {
+            level = instr[3]
+            ancestor = fi
+            j = 0
+            while (j < level && ancestor >= 0) {
+              ancestor = parent_of[ancestor]
+              j = j + 1
+            }
+            if (ancestor >= 0 && remaps[ancestor] != null) {
+              anc_remap = remaps[ancestor]
+              old_slot = instr[2]
+              if (old_slot >= 0 && old_slot < remap_sizes[ancestor]) {
+                instr[2] = anc_remap[old_slot]
+              }
+            }
+          }
+          i = i + 1
+        }
+      }
+      fi = fi + 1
+    }
+
+    return null
+  }
+
  // =========================================================
  // Compose all passes
  // =========================================================
@@ -1090,6 +1545,9 @@ var streamline = function(ir, log) {
    }
  }

+  // Compress slots across all functions (must run after per-function passes)
+  compress_slots(ir)
+
  return ir
 }