From ad419797b4c9fbf5ebfccf14cbeecc2f26baf941 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Tue, 17 Feb 2026 17:40:44 -0600 Subject: [PATCH 1/9] native function type --- build.cm | 67 +---- meson.build | 3 +- qbe_emit.cm | 144 ++++++++-- source/mach.c | 34 ++- source/qbe_helpers.c | 584 ++++++++++++++++++++++++++------------ source/quickjs-internal.h | 9 + source/runtime.c | 6 + vm_suite.ce | 21 ++ 8 files changed, 603 insertions(+), 265 deletions(-) diff --git a/build.cm b/build.cm index 41ae6af0..fdff7ce9 100644 --- a/build.cm +++ b/build.cm @@ -509,62 +509,21 @@ Build.build_static = function(packages, target, output, buildtype) { // il_parts: {data: text, functions: [text, ...]} // cc: C compiler path // tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_) -function compile_native_batched(il_parts, cc, tmp_prefix) { - var nfuncs = length(il_parts.functions) - var nbatch = 8 - var o_paths = [] - var s_paths = [] - var asm_cmds = [] - var batch_fns = null - var batch_il = null - var asm_text = null - var s_path = null - var o_path = null - var end = 0 - var bi = 0 - var fi = 0 - var ai = 0 - var rc = null - var parallel_cmd = null +function compile_native_single(il_parts, cc, tmp_prefix) { var helpers_il = (il_parts.helpers && length(il_parts.helpers) > 0) ? text(il_parts.helpers, "\n") : "" - var prefix = null - - if (nfuncs < nbatch) nbatch = nfuncs - if (nbatch < 1) nbatch = 1 - - // Generate .s files: run QBE on each batch - while (bi < nbatch) { - batch_fns = [] - end = nfuncs * (bi + 1) / nbatch - while (fi < end) { - batch_fns[] = il_parts.functions[fi] - fi = fi + 1 - } - // Batch 0 includes helper functions; others reference them as external symbols - prefix = (bi == 0 && helpers_il != "") ? helpers_il + "\n\n" : "" - batch_il = il_parts.data + "\n\n" + prefix + text(batch_fns, "\n") - asm_text = os.qbe(batch_il) - s_path = tmp_prefix + '_b' + text(bi) + '.s' - o_path = tmp_prefix + '_b' + text(bi) + '.o' - fd.slurpwrite(s_path, stone(blob(asm_text))) - s_paths[] = s_path - o_paths[] = o_path - bi = bi + 1 - } - - // Assemble all batches in parallel - while (ai < length(s_paths)) { - asm_cmds[] = cc + ' -c ' + s_paths[ai] + ' -o ' + o_paths[ai] - ai = ai + 1 - } - parallel_cmd = text(asm_cmds, ' & ') + ' & wait' - rc = os.system(parallel_cmd) + var all_fns = text(il_parts.functions, "\n") + var full_il = il_parts.data + "\n\n" + helpers_il + "\n\n" + all_fns + var asm_text = os.qbe(full_il) + var s_path = tmp_prefix + '.s' + var o_path = tmp_prefix + '.o' + var rc = null + fd.slurpwrite(s_path, stone(blob(asm_text))) + rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path) if (rc != 0) { - print('Parallel assembly failed'); disrupt + print('Assembly failed'); disrupt } - - return o_paths + return [o_path] } // Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement) @@ -651,7 +610,7 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var tmp = '/tmp/cell_native_' + hash var rt_o_path = '/tmp/cell_qbe_rt.o' - var o_paths = compile_native_batched(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp) // Compile QBE runtime stubs if needed var rc = null @@ -734,7 +693,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var tmp = '/tmp/cell_native_' + hash var rt_o_path = '/tmp/cell_qbe_rt.o' - var o_paths = compile_native_batched(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp) // Compile QBE runtime stubs if needed var rc = null diff --git a/meson.build b/meson.build index 17b96f7a..28ea6cc9 100644 --- a/meson.build +++ b/meson.build @@ -38,8 +38,7 @@ if host_machine.system() == 'darwin' foreach fkit : fworks deps += dependency('appleframeworks', modules: fkit) endforeach - # 32MB stack for deep native recursion (CPS patterns without TCO) - link += ['-Wl,-stack_size,0x2000000'] + # Native code uses dispatch loop (no C stack recursion) endif if host_machine.system() == 'playdate' diff --git a/qbe_emit.cm b/qbe_emit.cm index c0f24deb..23924b2c 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -475,10 +475,10 @@ ${sw("w", "%fp2", "%result_slot", "%r")} ret 0 }` - // function(ctx, fp, dest, fn_idx, arity) - h[] = `export function l $__function_ss(l %ctx, l %fp, l %dest, l %fn_idx, l %arity) { + // function(ctx, fp, dest, fn_idx, arity, nr_slots) + h[] = `export function l $__function_ss(l %ctx, l %fp, l %dest, l %fn_idx, l %arity, l %nr_slots) { @entry - %r =l call $cell_rt_make_function(l %ctx, l %fn_idx, l %fp, l %arity) + %r =l call $cell_rt_make_function(l %ctx, l %fn_idx, l %fp, l %arity, l %nr_slots) ${alloc_tail("%r")} }` @@ -680,11 +680,74 @@ var qbe_emit = function(ir, qbe, export_name) { var tol = null var fn_arity = 0 var arity_tmp = null + var fn_nr_slots = 0 + var invoke_count = 0 + var si = 0 + var scan = null + var scan_op = null + var has_invokes = false + var seg_counter = 0 + var ri = 0 + var seg_num = 0 + var resume_val = 0 + + // Pre-scan: count invoke/tail_invoke points to assign segment numbers. + // Must skip dead code (instructions after terminators) the same way + // the main emission loop does, otherwise we create jump table entries + // for segments that never get emitted. + var scan_dead = false + si = 0 + while (si < length(instrs)) { + scan = instrs[si] + si = si + 1 + if (is_text(scan)) { + // Labels reset dead code state (unless they're nop pseudo-labels) + if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_")) + scan_dead = false + continue + } + if (scan_dead) continue + if (!is_array(scan)) continue + scan_op = scan[0] + if (scan_op == "invoke" || scan_op == "tail_invoke") { + invoke_count = invoke_count + 1 + } + // Track terminators — same set as in the main loop + if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "disrupt") { + scan_dead = true + } + } + has_invokes = invoke_count > 0 // Function signature: (ctx, frame_ptr) → JSValue emit(`export function l $${name}(l %ctx, l %fp) {`) emit("@entry") + // Resume dispatch: if this function has invoke points, read the segment + // number from frame->address and jump to the right resume point. + // frame->address is at fp - 8 (last field before slots[]). + if (has_invokes) { + emit(" %addr_ptr =l sub %fp, 8") + emit(" %addr_raw =l loadl %addr_ptr") + // address is stored as JS_NewInt32 tagged value: n << 1 + emit(" %addr =l sar %addr_raw, 1") + emit(" %resume =l shr %addr, 16") + emit(` jnz %resume, @_rcheck1, @_seg0`) + ri = 1 + while (ri <= invoke_count) { + emit(`@_rcheck${text(ri)}`) + emit(` %_rc${text(ri)} =w ceql %resume, ${text(ri)}`) + if (ri < invoke_count) { + emit(` jnz %_rc${text(ri)}, @_seg${text(ri)}, @_rcheck${text(ri + 1)}`) + } else { + // Last check — if no match, fall through to seg0 + emit(` jnz %_rc${text(ri)}, @_seg${text(ri)}, @_seg0`) + } + ri = ri + 1 + } + emit("@_seg0") + } + // GC-safe slot access: every read/write goes through frame memory. // %fp may become stale after GC-triggering calls — use refresh_fp(). var s_read = function(slot) { @@ -1228,13 +1291,51 @@ var qbe_emit = function(ir, qbe, export_name) { continue } if (op == "invoke") { - emit(` %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) - emit_exc_check() + // Dispatch loop invoke: store resume info, signal, return 0 + seg_counter = seg_counter + 1 + seg_num = seg_counter + // Store (seg_num << 16 | result_slot) as tagged int in frame->address + resume_val = seg_num * 65536 + a2 + // frame->address is at fp - 8, store as tagged int (n << 1) + emit(` %_inv_addr${text(seg_num)} =l sub %fp, 8`) + emit(` storel ${text(resume_val * 2)}, %_inv_addr${text(seg_num)}`) + emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) + emit(" ret 0") + emit(`@_seg${text(seg_num)}`) + // Check for exception after dispatch loop resumes us + p = fresh() + emit(` %${p} =w call $JS_HasException(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jnz %${p}, @disruption_handler, @${p}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${p}, @_exc_ret, @${p}_ok`) + } + emit(`@${p}_ok`) + last_was_term = false continue } if (op == "tail_invoke") { - emit(` %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) - emit_exc_check() + // Same as invoke — dispatch loop regular call with resume + seg_counter = seg_counter + 1 + seg_num = seg_counter + resume_val = seg_num * 65536 + a2 + emit(` %_tinv_addr${text(seg_num)} =l sub %fp, 8`) + emit(` storel ${text(resume_val * 2)}, %_tinv_addr${text(seg_num)}`) + emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) + emit(" ret 0") + emit(`@_seg${text(seg_num)}`) + // Check for exception after dispatch loop resumes us + p = fresh() + emit(` %${p} =w call $JS_HasException(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jnz %${p}, @disruption_handler, @${p}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${p}, @_exc_ret, @${p}_ok`) + } + emit(`@${p}_ok`) + last_was_term = false continue } if (op == "goframe") { @@ -1243,22 +1344,13 @@ var qbe_emit = function(ir, qbe, export_name) { continue } if (op == "goinvoke") { - v = s_read(a1) + // Dispatch loop tail call: signal tail call and return 0 + // Use 0xFFFF as ret_slot (no result to store — it's a tail call) p = fresh() - emit(` %${p} =l call $cell_rt_goinvoke(l %ctx, l ${v})`) - chk = fresh() - emit(` %${chk} =w ceql %${p}, 15`) - if (has_handler) { - emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) - emit(`@${chk}_ok`) - refresh_fp() - emit(` ret %${p}`) - } else { - needs_exc_ret = true - emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) - emit(`@${chk}_ok`) - emit(` ret %${p}`) - } + emit(` %${p}_addr =l sub %fp, 8`) + emit(` storel ${text(65535 * 2)}, %${p}_addr`) + emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) + emit(" ret 0") last_was_term = true continue } @@ -1267,10 +1359,12 @@ var qbe_emit = function(ir, qbe, export_name) { if (op == "function") { fn_arity = 0 + fn_nr_slots = 0 if (a2 >= 0 && a2 < length(ir.functions)) { fn_arity = ir.functions[a2].nr_args + fn_nr_slots = ir.functions[a2].nr_slots } - emit(` %fp =l call $__function_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(fn_arity)})`) + emit(` %fp =l call $__function_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(fn_arity)}, l ${text(fn_nr_slots)})`) emit_exc_check() continue } @@ -1407,6 +1501,10 @@ var qbe_emit = function(ir, qbe, export_name) { compile_fn(ir.main, -1, true) fn_bodies[] = text(out, "\n") + // Export nr_slots for main function so the module loader can use right-sized frames + var main_name = export_name ? sanitize(export_name) : "cell_main" + push(data_out, `export data $${main_name}_nr_slots = { w ${text(ir.main.nr_slots)} }`) + return { data: text(data_out, "\n"), functions: fn_bodies, diff --git a/source/mach.c b/source/mach.c index 7dbf48b2..3d05f17d 100644 --- a/source/mach.c +++ b/source/mach.c @@ -490,6 +490,32 @@ JSValue js_new_register_function(JSContext *ctx, JSCodeRegister *code, JSValue e return JS_MKPTR(fn); } +/* Create a native (QBE-compiled) function */ +JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, + uint16_t nr_slots, int arity, JSValue outer_frame) { + JSGCRef frame_ref; + JS_PushGCRef(ctx, &frame_ref); + frame_ref.val = outer_frame; + + JSFunction *fn = js_mallocz(ctx, sizeof(JSFunction)); + if (!fn) { + JS_PopGCRef(ctx, &frame_ref); + return JS_EXCEPTION; + } + + fn->header = objhdr_make(0, OBJ_FUNCTION, 0, 0, 0, 0); + fn->kind = JS_FUNC_KIND_NATIVE; + fn->length = arity; + fn->name = JS_NULL; + fn->u.native.fn_ptr = fn_ptr; + fn->u.native.dl_handle = dl_handle; + fn->u.native.nr_slots = nr_slots; + fn->u.native.outer_frame = frame_ref.val; + + JS_PopGCRef(ctx, &frame_ref); + return JS_MKPTR(fn); +} + /* Binary operations helper */ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { /* Fast path for integers */ @@ -1924,12 +1950,14 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, env = fn->u.reg.env_record; pc = code->entry_point; } else { - /* C or bytecode function: args already in fr->slots (GC-protected via frame chain) */ + /* C, native, or bytecode function */ ctx->reg_current_frame = frame_ref.val; ctx->current_register_pc = pc > 0 ? pc - 1 : 0; JSValue ret; if (fn->kind == JS_FUNC_KIND_C) ret = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); + else if (fn->kind == JS_FUNC_KIND_NATIVE) + ret = cell_native_dispatch(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); else ret = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1], 0); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); @@ -2007,12 +2035,14 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, pc = code->entry_point; } } else { - /* C/bytecode function: call it, then return result to our caller */ + /* C, native, or bytecode function: call it, then return result to our caller */ ctx->reg_current_frame = frame_ref.val; ctx->current_register_pc = pc > 0 ? pc - 1 : 0; JSValue ret; if (fn->kind == JS_FUNC_KIND_C) ret = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); + else if (fn->kind == JS_FUNC_KIND_NATIVE) + ret = cell_native_dispatch(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); else ret = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1], 0); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 4f189280..547f3d2f 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -320,25 +320,34 @@ JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { } /* --- Closure access --- - Slot 511 in each frame stores the magic ID (registry index) of the - function that owns this frame. cell_rt_get/put_closure re-derive - the enclosing frame from the function's GC ref at call time, so - pointers stay valid even if GC moves frames. */ + Walk the outer_frame chain on JSFunction (JS_FUNC_KIND_NATIVE). + The frame's function field links to the JSFunction, whose + u.native.outer_frame points to the enclosing frame. + GC traces outer_frame naturally — no registry needed. */ -#define QBE_FRAME_OUTER_SLOT 511 - -static JSValue *derive_outer_fp(int magic); +/* Get the outer frame's slots from a frame pointer. + The frame's function must be JS_FUNC_KIND_NATIVE. */ +static JSValue *get_outer_frame_slots(JSValue *fp) { + /* fp points to frame->slots[0]; frame header is before it */ + JSFrameRegister *frame = (JSFrameRegister *)((char *)fp - offsetof(JSFrameRegister, slots)); + if (JS_IsNull(frame->function)) + return NULL; + JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); + if (fn->kind != JS_FUNC_KIND_NATIVE) + return NULL; + JSValue outer = fn->u.native.outer_frame; + if (JS_IsNull(outer)) + return NULL; + JSFrameRegister *outer_frame = (JSFrameRegister *)JS_VALUE_GET_PTR(outer); + return (JSValue *)outer_frame->slots; +} JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth, int64_t slot) { + (void)ctx; JSValue *frame = (JSValue *)fp; for (int64_t d = 0; d < depth; d++) { - /* fp[511] stores the magic ID (registry index) of the function - that owns this frame. derive_outer_fp re-derives the enclosing - frame from the function's GC ref, so it's always current even - if GC moved the frame. */ - int magic = (int)(int64_t)frame[QBE_FRAME_OUTER_SLOT]; - frame = derive_outer_fp(magic); + frame = get_outer_frame_slots(frame); if (!frame) return JS_NULL; } @@ -347,42 +356,26 @@ JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth, void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth, int64_t slot) { + (void)ctx; JSValue *frame = (JSValue *)fp; for (int64_t d = 0; d < depth; d++) { - int magic = (int)(int64_t)frame[QBE_FRAME_OUTER_SLOT]; - frame = derive_outer_fp(magic); + frame = get_outer_frame_slots(frame); if (!frame) return; } frame[slot] = val; } /* --- GC-managed AOT frame stack --- - Each AOT function call pushes a GC ref so the GC can find and - update frame pointers when it moves objects. cell_rt_refresh_fp - re-derives the slot pointer after any GC-triggering call. */ + Each native dispatch loop pushes a GC ref so the GC can find and + update the current frame pointer when it moves objects. + cell_rt_refresh_fp re-derives the slot pointer after any GC call. */ -#define MAX_AOT_DEPTH 65536 +#define MAX_AOT_DEPTH 8192 static JSGCRef g_aot_gc_refs[MAX_AOT_DEPTH]; static int g_aot_depth = 0; -/* Check remaining C stack space to prevent segfaults from deep recursion */ -static int stack_space_ok(void) { -#ifdef __APPLE__ - char local; - void *stack_addr = pthread_get_stackaddr_np(pthread_self()); - size_t stack_size = pthread_get_stacksize_np(pthread_self()); - /* stack_addr is the TOP of the stack (highest address); stack grows down */ - uintptr_t stack_bottom = (uintptr_t)stack_addr - stack_size; - uintptr_t current = (uintptr_t)&local; - /* Keep 128KB of reserve for unwinding and error handling */ - return (current - stack_bottom) > (128 * 1024); -#else - return g_aot_depth < MAX_AOT_DEPTH; -#endif -} - JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { - if (g_aot_depth >= MAX_AOT_DEPTH || !stack_space_ok()) { + if (g_aot_depth >= MAX_AOT_DEPTH) { JS_ThrowTypeError(ctx, "native call stack overflow (depth %d)", g_aot_depth); return NULL; } @@ -411,9 +404,7 @@ JSValue *cell_rt_refresh_fp(JSContext *ctx) { return (JSValue *)frame->slots; } -/* Combined refresh + exception check in a single call. - Returns the refreshed fp, or NULL if there is a pending exception. - This avoids QBE register-allocation issues from two consecutive calls. */ +/* Combined refresh + exception check in a single call. */ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { if (JS_HasException(ctx)) return NULL; @@ -439,126 +430,346 @@ void cell_rt_leave_frame(JSContext *ctx) { typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); -/* Per-module function registry. - Each native .cm module gets its own dylib. When a module creates closures - via cell_rt_make_function, we record the dylib handle so the trampoline - can look up the correct cell_fn_N in the right dylib. */ -#define MAX_NATIVE_FN 32768 - -static struct { - void *dl_handle; - int fn_idx; - JSGCRef frame_ref; /* independent GC ref for enclosing frame */ - int has_frame_ref; -} g_native_fn_registry[MAX_NATIVE_FN]; - -static int g_native_fn_count = 0; - -/* Set before executing a native module's cell_main */ +/* Set before executing a native module's cell_main — + used by cell_rt_make_function to resolve fn_ptr via dlsym */ static void *g_current_dl_handle = NULL; -/* Derive the outer frame's slots pointer from the closure's own GC ref. - Each closure keeps an independent GC ref so the enclosing frame - survives even after cell_rt_leave_frame pops the stack ref. */ -static JSValue *derive_outer_fp(int magic) { - if (!g_native_fn_registry[magic].has_frame_ref) return NULL; - JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR( - g_native_fn_registry[magic].frame_ref.val); - return (JSValue *)frame->slots; +/* ============================================================ + Dispatch loop — the core of native function execution. + Each compiled cell_fn_N returns to this loop when it needs + to call another function (instead of recursing via C stack). + ============================================================ */ + +/* Pending call state — set by cell_rt_signal_call / cell_rt_signal_tail_call, + read by the dispatch loop. */ +static JSValue g_pending_callee_frame = 0; /* JSFrameRegister ptr */ +static int g_pending_is_tail = 0; + +void cell_rt_signal_call(JSContext *ctx, void *fp, int64_t frame_slot) { + (void)ctx; + JSValue *slots = (JSValue *)fp; + g_pending_callee_frame = slots[frame_slot]; + g_pending_is_tail = 0; } -static void reclaim_native_fns(JSContext *ctx, int saved_count) { - /* Free GC refs for temporary closures created during a call */ - for (int i = saved_count; i < g_native_fn_count; i++) { - if (g_native_fn_registry[i].has_frame_ref) { - JS_DeleteGCRef(ctx, &g_native_fn_registry[i].frame_ref); - g_native_fn_registry[i].has_frame_ref = 0; - } - } - g_native_fn_count = saved_count; +void cell_rt_signal_tail_call(JSContext *ctx, void *fp, int64_t frame_slot) { + (void)ctx; + JSValue *slots = (JSValue *)fp; + g_pending_callee_frame = slots[frame_slot]; + g_pending_is_tail = 1; } -static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val, - int argc, JSValue *argv, int magic) { - if (magic < 0 || magic >= g_native_fn_count) - return JS_ThrowTypeError(ctx, "invalid native function id %d", magic); +/* Entry point called from JS_CallInternal / JS_Call / MACH_INVOKE + for JS_FUNC_KIND_NATIVE functions. */ +JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, + JSValue this_obj, int argc, JSValue *argv) { + JSFunction *f = JS_VALUE_GET_FUNCTION(func_obj); + cell_compiled_fn fn = (cell_compiled_fn)f->u.native.fn_ptr; + int nr_slots = f->u.native.nr_slots; + int arity = f->length; - void *handle = g_native_fn_registry[magic].dl_handle; - int fn_idx = g_native_fn_registry[magic].fn_idx; + /* Root func_obj across allocation — GC can move it */ + JSGCRef func_ref; + JS_PushGCRef(ctx, &func_ref); + func_ref.val = func_obj; - char name[64]; - snprintf(name, sizeof(name), "cell_fn_%d", fn_idx); - - cell_compiled_fn fn = (cell_compiled_fn)dlsym(handle, name); - if (!fn) - return JS_ThrowTypeError(ctx, "native function %s not found in dylib", name); - - /* Allocate GC-managed frame: slot 0 = this, slots 1..argc = args */ - JSValue *fp = cell_rt_enter_frame(ctx, 512); - if (!fp) return JS_EXCEPTION; - fp[0] = this_val; - for (int i = 0; i < argc && i < 510; i++) - fp[1 + i] = argv[i]; - - /* Store the magic ID (registry index) so cell_rt_get/put_closure - can re-derive the enclosing frame from the GC ref at call time, - surviving GC moves */ - fp[QBE_FRAME_OUTER_SLOT] = (JSValue)(int64_t)magic; - - /* Set g_current_dl_handle so any closures created during this call - (e.g. inner functions returned by factory functions) are registered - against the correct dylib */ - void *prev_handle = g_current_dl_handle; - g_current_dl_handle = handle; - - /* At top-level (depth 1 = this is the outermost native call), - save the fn count so we can reclaim temporary closures after */ - int saved_fn_count = (g_aot_depth == 1) ? g_native_fn_count : -1; - - JSValue result = fn(ctx, fp); - cell_rt_leave_frame(ctx); - g_current_dl_handle = prev_handle; - - /* Reclaim temporary closures created during this top-level call */ - if (saved_fn_count >= 0) - reclaim_native_fns(ctx, saved_fn_count); - - if (result == JS_EXCEPTION) { - /* Ensure there is a pending exception. QBE @_exc_ret returns 15 - but may not have set one (e.g. if cell_rt_enter_frame failed). */ - if (!JS_HasException(ctx)) - JS_Throw(ctx, JS_NULL); + /* Allocate initial frame */ + JSValue *fp = cell_rt_enter_frame(ctx, nr_slots); + if (!fp) { + JS_PopGCRef(ctx, &func_ref); return JS_EXCEPTION; } - return result; -} -JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp, - int64_t nr_args) { - (void)outer_fp; - if (g_native_fn_count >= MAX_NATIVE_FN) - return JS_ThrowTypeError(ctx, "too many native functions (max %d)", MAX_NATIVE_FN); + /* Re-derive func_obj after potential GC */ + func_obj = func_ref.val; + JS_PopGCRef(ctx, &func_ref); - int global_id = g_native_fn_count++; - g_native_fn_registry[global_id].dl_handle = g_current_dl_handle; - g_native_fn_registry[global_id].fn_idx = (int)fn_idx; + /* Set up frame: this in slot 0, args in slots 1..N */ + fp[0] = this_obj; + int copy = (argc < arity) ? argc : arity; + if (copy < 0) copy = argc; /* variadic: copy all */ + for (int i = 0; i < copy && i < nr_slots - 1; i++) + fp[1 + i] = argv[i]; - /* Create independent GC ref so the enclosing frame survives - even after cell_rt_leave_frame pops the stack ref */ - if (g_aot_depth > 0) { - JSGCRef *ref = &g_native_fn_registry[global_id].frame_ref; - JS_AddGCRef(ctx, ref); - ref->val = g_aot_gc_refs[g_aot_depth - 1].val; - g_native_fn_registry[global_id].has_frame_ref = 1; - } else { - g_native_fn_registry[global_id].has_frame_ref = 0; + /* Link function to frame for closure access */ + JSFrameRegister *frame = (JSFrameRegister *)((char *)fp - offsetof(JSFrameRegister, slots)); + frame->function = func_obj; + + int base_depth = g_aot_depth; /* remember entry depth for return detection */ + + for (;;) { + g_pending_callee_frame = 0; + + JSValue result = fn(ctx, fp); + + /* Re-derive frame after potential GC */ + JSValue frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + + if (g_pending_callee_frame != 0) { + /* Function signaled a call — dispatch it */ + JSValue callee_frame_val = g_pending_callee_frame; + g_pending_callee_frame = 0; + JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_frame_val); + int callee_argc = (int)objhdr_cap56(callee_fr->header); + callee_argc = (callee_argc >= 2) ? callee_argc - 2 : 0; + JSValue callee_fn_val = callee_fr->function; + + if (!JS_IsFunction(callee_fn_val)) { + JS_ThrowTypeError(ctx, "not a function"); + /* Resume caller with exception pending */ + JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + continue; + } + + JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(callee_fn_val); + + if (callee_fn->kind == JS_FUNC_KIND_NATIVE) { + /* Native-to-native call — no C stack growth */ + cell_compiled_fn callee_ptr = (cell_compiled_fn)callee_fn->u.native.fn_ptr; + int callee_slots = callee_fn->u.native.nr_slots; + + if (g_pending_is_tail) { + /* Tail call: reuse or replace current frame */ + if (callee_slots <= (int)objhdr_cap56(frame->header)) { + /* Reuse current frame */ + int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; + if (cc < 0) cc = callee_argc; + frame->slots[0] = callee_fr->slots[0]; /* this */ + for (int i = 0; i < cc && i < callee_slots - 1; i++) + frame->slots[1 + i] = callee_fr->slots[1 + i]; + /* Null out remaining slots */ + int cur_slots = (int)objhdr_cap56(frame->header); + for (int i = 1 + cc; i < cur_slots; i++) + frame->slots[i] = JS_NULL; + frame->function = callee_fn_val; + frame->address = JS_NewInt32(ctx, 0); + fn = callee_ptr; + /* fp stays the same (same frame) */ + } else { + /* Need bigger frame — save callee info, pop+push */ + JSValue saved_caller = frame->caller; + JSValue callee_this = callee_fr->slots[0]; + int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; + if (cc < 0) cc = callee_argc; + JSValue callee_args[cc > 0 ? cc : 1]; + for (int i = 0; i < cc; i++) + callee_args[i] = callee_fr->slots[1 + i]; + + /* Pop old frame */ + cell_rt_leave_frame(ctx); + + /* Push new right-sized frame */ + JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); + if (!new_fp) + return JS_EXCEPTION; + JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); + new_frame->function = callee_fn_val; + new_frame->caller = saved_caller; + new_frame->slots[0] = callee_this; + for (int i = 0; i < cc && i < callee_slots - 1; i++) + new_frame->slots[1 + i] = callee_args[i]; + frame = new_frame; + fp = new_fp; + fn = callee_ptr; + } + } else { + /* Regular call: push new frame, link caller */ + int ret_info = JS_VALUE_GET_INT(frame->address); + int resume_seg = ret_info >> 16; + int ret_slot = ret_info & 0xFFFF; + + /* Save callee info before allocation */ + JSValue callee_this = callee_fr->slots[0]; + int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; + if (cc < 0) cc = callee_argc; + JSValue callee_args[cc > 0 ? cc : 1]; + for (int i = 0; i < cc; i++) + callee_args[i] = callee_fr->slots[1 + i]; + + JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); + if (!new_fp) { + /* Resume caller with exception pending */ + frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + continue; + } + + /* Re-derive caller frame after alloc */ + frame_val = g_aot_gc_refs[g_aot_depth - 2].val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + + JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); + new_frame->function = callee_fn_val; + new_frame->caller = JS_MKPTR(frame); + new_frame->slots[0] = callee_this; + for (int i = 0; i < cc && i < callee_slots - 1; i++) + new_frame->slots[1 + i] = callee_args[i]; + + /* Save return address in caller */ + frame->address = JS_NewInt32(ctx, (resume_seg << 16) | ret_slot); + + frame = new_frame; + fp = new_fp; + fn = callee_ptr; + } + } else { + /* Non-native callee (C function, register VM, etc.) — + call it via the standard path and store the result */ + JSValue ret; + if (callee_fn->kind == JS_FUNC_KIND_C) + ret = js_call_c_function(ctx, callee_fn_val, callee_fr->slots[0], + callee_argc, &callee_fr->slots[1]); + else + ret = JS_CallInternal(ctx, callee_fn_val, callee_fr->slots[0], + callee_argc, &callee_fr->slots[1], 0); + + /* Re-derive frame after call */ + frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + + if (JS_IsException(ret)) { + /* Non-native callee threw — resume caller with exception pending. + The caller's generated code checks JS_HasException at resume. */ + if (!JS_HasException(ctx)) + JS_Throw(ctx, JS_NULL); + /* fn and fp still point to the calling native function's frame. + Just resume it — it will detect the exception. */ + JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + continue; + } + /* Clear stale exception */ + if (JS_HasException(ctx)) + JS_GetException(ctx); + + if (g_pending_is_tail) { + /* Tail call to non-native: return its result up the chain */ + /* Pop current frame and return to caller */ + if (g_aot_depth <= base_depth) { + cell_rt_leave_frame(ctx); + return ret; + } + /* Pop current frame, return to caller frame */ + JSValue caller_val = frame->caller; + cell_rt_leave_frame(ctx); + if (JS_IsNull(caller_val) || g_aot_depth < base_depth) { + return ret; + } + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(caller_val); + /* Update GC ref to point to caller */ + g_aot_gc_refs[g_aot_depth - 1].val = caller_val; + fp = (JSValue *)frame->slots; + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = ret; + /* Resume caller */ + JSFunction *caller_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)caller_fn->u.native.fn_ptr; + } else { + /* Regular call: store result and resume current function */ + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = ret; + /* fn stays the same — we resume the same function at next segment */ + JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)cur_fn->u.native.fn_ptr; + } + } + continue; + } + + /* No pending call — function returned a value or exception */ + if (result == JS_EXCEPTION) { + /* Exception: pop this frame and propagate to caller. + The caller's generated code has exception checks at resume points. */ + if (!JS_HasException(ctx)) + JS_Throw(ctx, JS_NULL); + + if (g_aot_depth <= base_depth) { + cell_rt_leave_frame(ctx); + return JS_EXCEPTION; + } + + JSValue exc_caller_val = frame->caller; + cell_rt_leave_frame(ctx); + + if (JS_IsNull(exc_caller_val) || g_aot_depth < base_depth) { + return JS_EXCEPTION; + } + + /* Resume caller — it will check JS_HasException and branch to handler */ + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(exc_caller_val); + g_aot_gc_refs[g_aot_depth - 1].val = exc_caller_val; + fp = (JSValue *)frame->slots; + + JSFunction *exc_caller_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)exc_caller_fn->u.native.fn_ptr; + continue; + } + + /* Normal return — pop frame and store result in caller */ + if (g_aot_depth <= base_depth) { + cell_rt_leave_frame(ctx); + return result; + } + + JSValue caller_val = frame->caller; + cell_rt_leave_frame(ctx); + + if (JS_IsNull(caller_val) || g_aot_depth < base_depth) { + return result; + } + + /* Return to caller frame */ + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(caller_val); + g_aot_gc_refs[g_aot_depth - 1].val = caller_val; + fp = (JSValue *)frame->slots; + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = result; + + JSFunction *caller_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)caller_fn->u.native.fn_ptr; + continue; } - - return JS_NewCFunction2(ctx, (JSCFunction *)cell_fn_trampoline, "native_fn", - (int)nr_args, JS_CFUNC_generic_magic, global_id); } -/* --- Frame-based function calling --- */ +/* Create a native function object from a compiled fn_idx. + Called from QBE-generated code during function creation. */ +JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp, + int64_t nr_args, int64_t nr_slots) { + if (!g_current_dl_handle) + return JS_ThrowTypeError(ctx, "no native module loaded"); + + /* Resolve fn_ptr via dlsym at creation time — cached in the function object */ + char name[64]; + snprintf(name, sizeof(name), "cell_fn_%lld", (long long)fn_idx); + void *fn_ptr = dlsym(g_current_dl_handle, name); + if (!fn_ptr) + return JS_ThrowTypeError(ctx, "native function %s not found in dylib", name); + + /* Get the current frame as outer_frame for closures */ + JSValue outer_frame = JS_NULL; + if (g_aot_depth > 0) + outer_frame = g_aot_gc_refs[g_aot_depth - 1].val; + + return js_new_native_function(ctx, fn_ptr, g_current_dl_handle, + (uint16_t)nr_slots, (int)nr_args, outer_frame); +} + +/* --- Frame-based function calling --- + Still used by QBE-generated code for building call frames + before signaling the dispatch loop. */ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) { if (!JS_IsFunction(fn)) { @@ -578,6 +789,7 @@ void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) { fr->slots[idx] = val; } +/* cell_rt_invoke — still used for non-dispatch-loop paths (e.g. old code) */ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { if (frame_val == JS_EXCEPTION) return JS_EXCEPTION; JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); @@ -594,11 +806,10 @@ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { JSValue result; if (fn->kind == JS_FUNC_KIND_C) { - /* Match MACH_INVOKE: C functions go directly to js_call_c_function, - bypassing JS_Call's arity check. Extra args are silently available. */ result = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); + } else if (fn->kind == JS_FUNC_KIND_NATIVE) { + result = cell_native_dispatch(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); } else { - /* Register/bytecode functions — use JS_CallInternal (no arity gate) */ JSValue args[c_argc > 0 ? c_argc : 1]; for (int i = 0; i < c_argc; i++) args[i] = fr->slots[i + 1]; @@ -607,9 +818,6 @@ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { if (JS_IsException(result)) return JS_EXCEPTION; - /* Clear any stale exception left by functions that returned a valid - value despite internal error (e.g., sign("text") returns null - but JS_ToFloat64 leaves an exception flag) */ if (JS_HasException(ctx)) JS_GetException(ctx); return result; @@ -765,8 +973,11 @@ void cell_rt_clear_exception(JSContext *ctx) { /* --- Disruption --- */ +/* Disrupt: silently set exception flag like the bytecode VM does. + Does NOT call JS_ThrowTypeError — that would print to stderr + even when a disruption handler will catch it. */ void cell_rt_disrupt(JSContext *ctx) { - JS_ThrowTypeError(ctx, "type error in native code"); + JS_Throw(ctx, JS_TRUE); } /* --- in: key in obj --- */ @@ -793,67 +1004,72 @@ JSValue cell_rt_regexp(JSContext *ctx, const char *pattern, const char *flags) { Looks up cell_main, builds a heap-allocated frame, sets g_current_dl_handle so closures register in the right module. */ -JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env) { - cell_compiled_fn fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main"); - if (!fn) - return JS_ThrowTypeError(ctx, "cell_main not found in native module dylib"); - - /* Set current handle so cell_rt_make_function registers closures - against this module's dylib */ +/* Helper: run a native module's entry point through the dispatch loop. + Creates a temporary JS_FUNC_KIND_NATIVE function so that the full + dispatch loop (tail calls, closures, etc.) works for module-level code. */ +static JSValue native_module_run(JSContext *ctx, void *dl_handle, + cell_compiled_fn entry, int nr_slots) { void *prev_handle = g_current_dl_handle; g_current_dl_handle = dl_handle; - /* Make env available for cell_rt_get_intrinsic lookups */ - cell_rt_set_native_env(ctx, env); - - /* GC-managed frame for module execution */ - JSValue *fp = cell_rt_enter_frame(ctx, 512); - if (!fp) { + /* Create a native function object for the entry point */ + JSValue func_obj = js_new_native_function(ctx, (void *)entry, dl_handle, + (uint16_t)nr_slots, 0, JS_NULL); + if (JS_IsException(func_obj)) { g_current_dl_handle = prev_handle; - return JS_ThrowTypeError(ctx, "frame allocation failed"); + return JS_EXCEPTION; } /* Clear any stale exception left by a previous interpreted run */ if (JS_HasException(ctx)) JS_GetException(ctx); - JSValue result = fn(ctx, fp); - cell_rt_leave_frame(ctx); /* safe — closures have independent GC refs */ + JSValue result = cell_native_dispatch(ctx, func_obj, JS_NULL, 0, NULL); g_current_dl_handle = prev_handle; - if (result == JS_EXCEPTION) - return JS_EXCEPTION; return result; } +JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env) { + cell_compiled_fn fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main"); + if (!fn) + return JS_ThrowTypeError(ctx, "cell_main not found in native module dylib"); + + /* Make env available for cell_rt_get_intrinsic lookups */ + cell_rt_set_native_env(ctx, env); + + /* Try to read nr_slots from the module (exported by emitter) */ + int *slots_ptr = (int *)dlsym(dl_handle, "cell_main_nr_slots"); + int nr_slots = slots_ptr ? *slots_ptr : 512; + + return native_module_run(ctx, dl_handle, fn, nr_slots); +} + /* Load a native module from a dylib handle, trying a named symbol first. Falls back to cell_main if the named symbol is not found. */ JSValue cell_rt_native_module_load_named(JSContext *ctx, void *dl_handle, const char *sym_name, JSValue env) { cell_compiled_fn fn = NULL; - if (sym_name) + const char *used_name = NULL; + if (sym_name) { fn = (cell_compiled_fn)dlsym(dl_handle, sym_name); - if (!fn) + if (fn) used_name = sym_name; + } + if (!fn) { fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main"); + used_name = "cell_main"; + } if (!fn) return JS_ThrowTypeError(ctx, "symbol not found in native module dylib"); - void *prev_handle = g_current_dl_handle; - g_current_dl_handle = dl_handle; - /* Make env available for cell_rt_get_intrinsic lookups */ cell_rt_set_native_env(ctx, env); - JSValue *fp = cell_rt_enter_frame(ctx, 512); - if (!fp) { - g_current_dl_handle = prev_handle; - return JS_ThrowTypeError(ctx, "frame allocation failed"); - } + /* Try to read nr_slots from the module */ + char slots_sym[128]; + snprintf(slots_sym, sizeof(slots_sym), "%s_nr_slots", used_name); + int *slots_ptr = (int *)dlsym(dl_handle, slots_sym); + int nr_slots = slots_ptr ? *slots_ptr : 512; - JSValue result = fn(ctx, fp); - cell_rt_leave_frame(ctx); /* safe — closures have independent GC refs */ - g_current_dl_handle = prev_handle; - if (result == JS_EXCEPTION) - return JS_EXCEPTION; - return result; + return native_module_run(ctx, dl_handle, fn, nr_slots); } /* Backward-compat: uses RTLD_DEFAULT (works when dylib opened with RTLD_GLOBAL) */ diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index f8487801..94e10b08 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -1322,6 +1322,7 @@ typedef enum { JS_FUNC_KIND_BYTECODE, JS_FUNC_KIND_C_DATA, JS_FUNC_KIND_REGISTER, /* register-based VM function */ + JS_FUNC_KIND_NATIVE, /* QBE-compiled native function */ } JSFunctionKind; typedef struct JSFunction { @@ -1340,6 +1341,12 @@ typedef struct JSFunction { JSValue env_record; /* stone record, module environment */ JSValue outer_frame; /* JSFrame JSValue, for closures */ } reg; + struct { + void *fn_ptr; /* compiled cell_fn_N pointer */ + void *dl_handle; /* dylib handle for dlsym lookups */ + uint16_t nr_slots; /* frame size for this function */ + JSValue outer_frame; /* GC-traced, for closures */ + } native; } u; } JSFunction; @@ -1362,6 +1369,7 @@ typedef struct JSFunction { JSValue js_call_c_function (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv); JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv, int flags); JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame); +JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv); int JS_DeleteProperty (JSContext *ctx, JSValue obj, JSValue prop); JSValue __attribute__ ((format (printf, 2, 3))) JS_ThrowInternalError (JSContext *ctx, const char *fmt, ...); @@ -1652,6 +1660,7 @@ JSValue js_key_from_string (JSContext *ctx, JSValue val); /* mach.c exports */ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame); +JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, uint16_t nr_slots, int arity, JSValue outer_frame); JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count); int reg_vm_check_interrupt(JSContext *ctx); diff --git a/source/runtime.c b/source/runtime.c index 08be4ee9..657fe161 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -1442,6 +1442,8 @@ void gc_scan_object (JSContext *ctx, void *ptr, uint8_t *from_base, uint8_t *fro /* Scan outer_frame and env_record */ fn->u.reg.outer_frame = gc_copy_value (ctx, fn->u.reg.outer_frame, from_base, from_end, to_base, to_free, to_end); fn->u.reg.env_record = gc_copy_value (ctx, fn->u.reg.env_record, from_base, from_end, to_base, to_free, to_end); + } else if (fn->kind == JS_FUNC_KIND_NATIVE) { + fn->u.native.outer_frame = gc_copy_value (ctx, fn->u.native.outer_frame, from_base, from_end, to_base, to_free, to_end); } break; } @@ -4732,6 +4734,8 @@ JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, case JS_FUNC_KIND_REGISTER: return JS_CallRegisterVM (ctx, f->u.reg.code, this_obj, argc, argv, f->u.reg.env_record, f->u.reg.outer_frame); + case JS_FUNC_KIND_NATIVE: + return cell_native_dispatch (ctx, func_obj, this_obj, argc, argv); default: return JS_ThrowTypeError (ctx, "not a function"); } @@ -4753,6 +4757,8 @@ JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, J case JS_FUNC_KIND_REGISTER: return JS_CallRegisterVM (ctx, f->u.reg.code, this_obj, argc, argv, f->u.reg.env_record, f->u.reg.outer_frame); + case JS_FUNC_KIND_NATIVE: + return cell_native_dispatch (ctx, func_obj, this_obj, argc, argv); default: return JS_ThrowTypeError (ctx, "not a function"); } diff --git a/vm_suite.ce b/vm_suite.ce index c14dd11c..6cee162b 100644 --- a/vm_suite.ce +++ b/vm_suite.ce @@ -827,6 +827,27 @@ run("disruption handler accesses object from outer scope", function() { if (obj.y != 20) fail("handler mutation lost, y=" + text(obj.y)) }) +run("disruption in callback with multiple calls after", function() { + // Regression: a function with a disruption handler that calls a + // callback which disrupts, followed by more successful calls. + // In native mode, cell_rt_disrupt must NOT use JS_ThrowTypeError + // (which prints to stderr) — it must silently set the exception. + var log = [] + var run_inner = function(name, fn) { + fn() + log[] = "pass:" + name + } disruption { + log[] = "fail:" + name + } + run_inner("a", function() { var x = 1 }) + run_inner("b", function() { disrupt }) + run_inner("c", function() { var y = 2 }) + if (length(log) != 3) fail("expected 3 log entries, got " + text(length(log))) + if (log[0] != "pass:a") fail("expected pass:a, got " + log[0]) + if (log[1] != "fail:b") fail("expected fail:b, got " + log[1]) + if (log[2] != "pass:c") fail("expected pass:c, got " + log[2]) +}) + // ============================================================================ // TYPE CHECKING WITH is_* FUNCTIONS // ============================================================================ From 4872c627048586292135809a09cddf1b0338036f Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 14:14:44 -0600 Subject: [PATCH 2/9] fix JS_ToNumber --- source/runtime.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/runtime.c b/source/runtime.c index f99557e8..b5c41bf1 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -3955,8 +3955,10 @@ JSValue JS_ToNumber (JSContext *ctx, JSValue val) { ret = val; break; case JS_TAG_BOOL: + ret = JS_NewInt32 (ctx, JS_ToBool (ctx, val)); + break; case JS_TAG_NULL: - ret = JS_NewInt32 (ctx, JS_VALUE_GET_INT (val)); + ret = JS_NewInt32 (ctx, 0); break; case JS_TAG_STRING_IMM: return JS_ThrowTypeError (ctx, "cannot convert text to a number"); From c33c35de875ed37b0b4bf31cd90227e8ea6ca0d0 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 16:53:33 -0600 Subject: [PATCH 3/9] aot pass all tests --- build.cm | 40 ++-- internal/os.c | 1 + qbe_emit.cm | 412 ++++++++++++++++++++++++++++++++++---- source/qbe_helpers.c | 270 ++++++++++++++++--------- source/quickjs-internal.h | 5 + source/runtime.c | 4 + 6 files changed, 583 insertions(+), 149 deletions(-) diff --git a/build.cm b/build.cm index 5d2a40fb..5d14d501 100644 --- a/build.cm +++ b/build.cm @@ -80,6 +80,17 @@ function content_hash(str) { return text(crypto.blake2(bb, 32), 'h') } +// Bump when native codegen/runtime ABI changes so stale dylibs are not reused. +def NATIVE_CACHE_VERSION = "native-v8" + +// Enable AOT ASan by creating .cell/asan_aot in the package root. +function native_sanitize_flags() { + if (fd.is_file('.cell/asan_aot')) { + return ' -fsanitize=address -fno-omit-frame-pointer' + } + return '' +} + function get_build_dir() { return shop.get_build_dir() } @@ -509,7 +520,8 @@ Build.build_static = function(packages, target, output, buildtype) { // il_parts: {data: text, functions: [text, ...]} // cc: C compiler path // tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_) -function compile_native_single(il_parts, cc, tmp_prefix) { +function compile_native_single(il_parts, cc, tmp_prefix, extra_flags) { + var _extra = extra_flags || '' var helpers_il = (il_parts.helpers && length(il_parts.helpers) > 0) ? text(il_parts.helpers, "\n") : "" var all_fns = text(il_parts.functions, "\n") @@ -519,7 +531,7 @@ function compile_native_single(il_parts, cc, tmp_prefix) { var o_path = tmp_prefix + '.o' var rc = null fd.slurpwrite(s_path, stone(blob(asm_text))) - rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path) + rc = os.system(cc + _extra + ' -c ' + s_path + ' -o ' + o_path) if (rc != 0) { print('Assembly failed'); disrupt } @@ -572,6 +584,8 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var tc = toolchains[_target] var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so') var cc = tc.c + var san_flags = native_sanitize_flags() + var san_suffix = length(san_flags) > 0 ? '_asan' : '' // Step 1: Compile through pipeline var optimized = shop.compile_file(src_path) @@ -586,7 +600,7 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) // Content hash for cache key - var hash = content_hash(text(fd.slurp(src_path)) + '\n' + _target + '\nnative') + var hash = content_hash(text(fd.slurp(src_path)) + '\n' + _target + '\nnative\n' + NATIVE_CACHE_VERSION + '\n' + san_flags) var build_dir = get_build_dir() ensure_dir(build_dir) @@ -596,22 +610,22 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { // Compile and assemble via batched parallel pipeline var tmp = '/tmp/cell_native_' + hash - var rt_o_path = '/tmp/cell_qbe_rt.o' + var rt_o_path = '/tmp/cell_qbe_rt' + san_suffix + '.o' - var o_paths = compile_native_single(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp, san_flags) // Compile QBE runtime stubs if needed var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' - rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') + rc = os.system(cc + san_flags + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') if (rc != 0) { print('QBE runtime stubs compilation failed'); disrupt } } // Link dylib - var link_cmd = cc + ' -shared -fPIC' + var link_cmd = cc + san_flags + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { @@ -658,6 +672,8 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var tc = toolchains[_target] var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so') var cc = tc.c + var san_flags = native_sanitize_flags() + var san_suffix = length(san_flags) > 0 ? '_asan' : '' var qbe_macros = use('qbe') var qbe_emit = use('qbe_emit') @@ -669,7 +685,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) var src = text(fd.slurp(src_path)) - var hash = content_hash(src + '\n' + _target + '\nnative') + var hash = content_hash(src + '\n' + _target + '\nnative\n' + NATIVE_CACHE_VERSION + '\n' + san_flags) var build_dir = get_build_dir() ensure_dir(build_dir) @@ -679,22 +695,22 @@ Build.compile_native_ir = function(optimized, src_path, opts) { // Compile and assemble via batched parallel pipeline var tmp = '/tmp/cell_native_' + hash - var rt_o_path = '/tmp/cell_qbe_rt.o' + var rt_o_path = '/tmp/cell_qbe_rt' + san_suffix + '.o' - var o_paths = compile_native_single(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp, san_flags) // Compile QBE runtime stubs if needed var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' - rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') + rc = os.system(cc + san_flags + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') if (rc != 0) { print('QBE runtime stubs compilation failed'); disrupt } } // Link dylib - var link_cmd = cc + ' -shared -fPIC' + var link_cmd = cc + san_flags + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { diff --git a/internal/os.c b/internal/os.c index 4fa6d549..22b9acc6 100644 --- a/internal/os.c +++ b/internal/os.c @@ -306,6 +306,7 @@ static JSValue js_os_rusage(JSContext *js, JSValue self, int argc, JSValue *argv JSC_SCALL(os_system, int err = system(str); + JS_SetPauseFlag(js, 0); ret = number2js(js,err); ) diff --git a/qbe_emit.cm b/qbe_emit.cm index 23924b2c..e62b419f 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -360,10 +360,93 @@ ${sw("w", "%fp", "%dest", "%r")} // Category C: Allocating helpers (return fp or 0) // ============================================================ - // Allocating binary ops: read 2 slots, call C, refresh, write dest + // add: int fast path in-helper, slow path calls runtime + h[] = `export function l $__add_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_is_int =w ceql %a_tag, 0 + %b_is_int =w ceql %b_tag, 0 + %both_int =w and %a_is_int, %b_is_int + jnz %both_int, @int_fast, @slow +@int_fast + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %sum =l add %ai, %bi + %sumw =w copy %sum + %sumext =l extsw %sumw + %sum_ok =w ceql %sumext, %sum + jnz %sum_ok, @int_store, @slow +@int_store + %rtag =l shl %sum, 1 +${sw("w", "%fp", "%dest", "%rtag")} + ret %fp +@slow + %r =l call $cell_rt_add(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // sub: int fast path in-helper, slow path calls float helper + h[] = `export function l $__sub_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_is_int =w ceql %a_tag, 0 + %b_is_int =w ceql %b_tag, 0 + %both_int =w and %a_is_int, %b_is_int + jnz %both_int, @int_fast, @slow +@int_fast + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %diff =l sub %ai, %bi + %diffw =w copy %diff + %diffext =l extsw %diffw + %diff_ok =w ceql %diffext, %diff + jnz %diff_ok, @int_store, @slow +@int_store + %rtag =l shl %diff, 1 +${sw("w", "%fp", "%dest", "%rtag")} + ret %fp +@slow + %r =l call $qbe_float_sub(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // mul: int fast path in-helper, slow path calls float helper + h[] = `export function l $__mul_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_is_int =w ceql %a_tag, 0 + %b_is_int =w ceql %b_tag, 0 + %both_int =w and %a_is_int, %b_is_int + jnz %both_int, @int_fast, @slow +@int_fast + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %prod =l mul %ai, %bi + %prodw =w copy %prod + %prodext =l extsw %prodw + %prod_ok =w ceql %prodext, %prod + jnz %prod_ok, @int_store, @slow +@int_store + %rtag =l shl %prod, 1 +${sw("w", "%fp", "%dest", "%rtag")} + ret %fp +@slow + %r =l call $qbe_float_mul(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // Remaining allocating binary ops: call C, refresh, write dest var ab_ops = [ - ["add", "cell_rt_add"], ["sub", "qbe_float_sub"], - ["mul", "qbe_float_mul"], ["div", "qbe_float_div"], + ["div", "qbe_float_div"], ["mod", "qbe_float_mod"], ["pow", "qbe_float_pow"], ["concat", "JS_ConcatString"] ] @@ -685,11 +768,27 @@ var qbe_emit = function(ir, qbe, export_name) { var si = 0 var scan = null var scan_op = null + var label_pos = {} + var instr_idx = 0 var has_invokes = false var seg_counter = 0 var ri = 0 var seg_num = 0 var resume_val = 0 + var j_lbl = null + var j_idx = null + var jt_lbl = null + var jt_idx = null + var jt_backedge = false + var jf_lbl = null + var jf_idx = null + var jf_backedge = false + var jn_lbl = null + var jn_idx = null + var jn_backedge = false + var jnn_lbl = null + var jnn_idx = null + var jnn_backedge = false // Pre-scan: count invoke/tail_invoke points to assign segment numbers. // Must skip dead code (instructions after terminators) the same way @@ -701,6 +800,7 @@ var qbe_emit = function(ir, qbe, export_name) { scan = instrs[si] si = si + 1 if (is_text(scan)) { + label_pos[sanitize(scan)] = si - 1 // Labels reset dead code state (unless they're nop pseudo-labels) if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_")) scan_dead = false @@ -709,11 +809,11 @@ var qbe_emit = function(ir, qbe, export_name) { if (scan_dead) continue if (!is_array(scan)) continue scan_op = scan[0] - if (scan_op == "invoke" || scan_op == "tail_invoke") { + if (scan_op == "invoke") { invoke_count = invoke_count + 1 } // Track terminators — same set as in the main loop - if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "disrupt") { + if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "tail_invoke" || scan_op == "disrupt") { scan_dead = true } } @@ -795,11 +895,24 @@ var qbe_emit = function(ir, qbe, export_name) { emit(`@${lbl}_ok`) } + // Poll pause/interrupt state on taken backward jumps. + var emit_backedge_branch = function(target_label) { + var chk_lbl = fresh() + emit(` %${chk_lbl} =w call $cell_rt_check_backedge(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jnz %${chk_lbl}, @disruption_handler, @${target_label}`) + } else { + needs_exc_ret = true + emit(` jnz %${chk_lbl}, @_exc_ret, @${target_label}`) + } + } + // Walk instructions var last_was_term = false i = 0 while (i < length(instrs)) { instr = instrs[i] + instr_idx = i // Emit @disruption_handler at the right flat index // disruption_pc counts all entries (labels + instructions) @@ -909,18 +1022,117 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Generic arithmetic (VM dispatches int/float) --- if (op == "add") { - emit(` %fp =l call $__add_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_sum =l add %${p}_ai, %${p}_bi`) + emit(` %${p}_sumw =w copy %${p}_sum`) + emit(` %${p}_sumext =l extsw %${p}_sumw`) + emit(` %${p}_sum_ok =w ceql %${p}_sumext, %${p}_sum`) + emit(` jnz %${p}_sum_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_sum, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` %${p}_r =l call $cell_rt_add(l %ctx, l ${lhs}, l ${rhs})`) + emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) + chk = fresh() + emit(` %${chk} =w ceql %fp, 0`) + if (has_handler && !in_handler) { + emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) + } + emit(`@${chk}_ok`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "subtract") { - emit(` %fp =l call $__sub_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_diff =l sub %${p}_ai, %${p}_bi`) + emit(` %${p}_diffw =w copy %${p}_diff`) + emit(` %${p}_diffext =l extsw %${p}_diffw`) + emit(` %${p}_diff_ok =w ceql %${p}_diffext, %${p}_diff`) + emit(` jnz %${p}_diff_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_diff, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` %${p}_r =l call $qbe_float_sub(l %ctx, l ${lhs}, l ${rhs})`) + emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) + chk = fresh() + emit(` %${chk} =w ceql %fp, 0`) + if (has_handler && !in_handler) { + emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) + } + emit(`@${chk}_ok`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "multiply") { - emit(` %fp =l call $__mul_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_prod =l mul %${p}_ai, %${p}_bi`) + emit(` %${p}_prodw =w copy %${p}_prod`) + emit(` %${p}_prodext =l extsw %${p}_prodw`) + emit(` %${p}_prod_ok =w ceql %${p}_prodext, %${p}_prod`) + emit(` jnz %${p}_prod_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_prod, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` %${p}_r =l call $qbe_float_mul(l %ctx, l ${lhs}, l ${rhs})`) + emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) + chk = fresh() + emit(` %${chk} =w ceql %fp, 0`) + if (has_handler && !in_handler) { + emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) + } + emit(`@${chk}_ok`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "divide") { @@ -1003,27 +1215,93 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Comparisons (int path, no GC) --- if (op == "eq_int") { - emit(` call $__eq_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w ceqw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "ne_int") { - emit(` call $__ne_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w cnew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "lt_int") { - emit(` call $__lt_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csltw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "gt_int") { - emit(` call $__gt_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csgtw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "le_int") { - emit(` call $__le_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w cslew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "ge_int") { - emit(` call $__ge_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csgew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } @@ -1240,39 +1518,99 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Control flow --- if (op == "jump") { - emit(` jmp @${sanitize(a1)}`) + j_lbl = sanitize(a1) + j_idx = label_pos[j_lbl] + if (j_idx != null && j_idx < instr_idx) { + emit_backedge_branch(j_lbl) + } else { + emit(` jmp @${j_lbl}`) + } last_was_term = true continue } if (op == "jump_true") { v = s_read(a1) p = fresh() - emit(` %${p} =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`) + jt_lbl = sanitize(a2) + jt_idx = label_pos[jt_lbl] + jt_backedge = jt_idx != null && jt_idx < instr_idx + emit(` %${p}_is_true =w ceql ${v}, ${text(qbe.js_true)}`) + emit(` jnz %${p}_is_true, @${p}_take, @${p}_chk_fast`) + emit(`@${p}_chk_fast`) + emit(` %${p}_tag =l and ${v}, 31`) + emit(` %${p}_is_bool =w ceql %${p}_tag, 3`) + emit(` %${p}_is_null =w ceql %${p}_tag, 7`) + emit(` %${p}_is_falsey =w or %${p}_is_bool, %${p}_is_null`) + emit(` jnz %${p}_is_falsey, @${p}_f, @${p}_tb`) + emit(`@${p}_tb`) + emit(` %${p}_tbv =w call $JS_ToBool(l %ctx, l ${v})`) + emit(` jnz %${p}_tbv, @${p}_take, @${p}_f`) + emit(`@${p}_take`) + if (jt_backedge) { + emit_backedge_branch(jt_lbl) + } else { + emit(` jmp @${jt_lbl}`) + } emit(`@${p}_f`) continue } if (op == "jump_false") { v = s_read(a1) p = fresh() - emit(` %${p} =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`) + jf_lbl = sanitize(a2) + jf_idx = label_pos[jf_lbl] + jf_backedge = jf_idx != null && jf_idx < instr_idx + emit(` %${p}_is_true =w ceql ${v}, ${text(qbe.js_true)}`) + emit(` jnz %${p}_is_true, @${p}_t, @${p}_chk_fast`) + emit(`@${p}_chk_fast`) + emit(` %${p}_tag =l and ${v}, 31`) + emit(` %${p}_is_bool =w ceql %${p}_tag, 3`) + emit(` %${p}_is_null =w ceql %${p}_tag, 7`) + emit(` %${p}_is_fast_false =w or %${p}_is_bool, %${p}_is_null`) + emit(` jnz %${p}_is_fast_false, @${p}_take, @${p}_tb`) + emit(`@${p}_tb`) + emit(` %${p}_tbv =w call $JS_ToBool(l %ctx, l ${v})`) + emit(` jnz %${p}_tbv, @${p}_t, @${p}_take`) + emit(`@${p}_take`) + if (jf_backedge) { + emit_backedge_branch(jf_lbl) + } else { + emit(` jmp @${jf_lbl}`) + } emit(`@${p}_t`) continue } if (op == "jump_null") { v = s_read(a1) p = fresh() + jn_lbl = sanitize(a2) + jn_idx = label_pos[jn_lbl] + jn_backedge = jn_idx != null && jn_idx < instr_idx emit(` %${p} =w ceql ${v}, ${text(qbe.js_null)}`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_nn`) + if (jn_backedge) { + emit(` jnz %${p}, @${p}_bn, @${p}_nn`) + emit(`@${p}_bn`) + emit_backedge_branch(jn_lbl) + } else { + emit(` jnz %${p}, @${jn_lbl}, @${p}_nn`) + } emit(`@${p}_nn`) continue } if (op == "jump_not_null") { v = s_read(a1) p = fresh() + jnn_lbl = sanitize(a2) + jnn_idx = label_pos[jnn_lbl] + jnn_backedge = jnn_idx != null && jnn_idx < instr_idx emit(` %${p} =w cnel ${v}, ${text(qbe.js_null)}`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_n`) + if (jnn_backedge) { + emit(` jnz %${p}, @${p}_bn, @${p}_n`) + emit(`@${p}_bn`) + emit_backedge_branch(jnn_lbl) + } else { + emit(` jnz %${p}, @${jnn_lbl}, @${p}_n`) + } emit(`@${p}_n`) continue } @@ -1316,26 +1654,14 @@ var qbe_emit = function(ir, qbe, export_name) { continue } if (op == "tail_invoke") { - // Same as invoke — dispatch loop regular call with resume - seg_counter = seg_counter + 1 - seg_num = seg_counter - resume_val = seg_num * 65536 + a2 - emit(` %_tinv_addr${text(seg_num)} =l sub %fp, 8`) - emit(` storel ${text(resume_val * 2)}, %_tinv_addr${text(seg_num)}`) - emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) - emit(" ret 0") - emit(`@_seg${text(seg_num)}`) - // Check for exception after dispatch loop resumes us + // Tail call: hand control to dispatch loop and do not resume this segment. + // Use 0xFFFF as ret_slot (no result writeback into current frame). p = fresh() - emit(` %${p} =w call $JS_HasException(l %ctx)`) - if (has_handler && !in_handler) { - emit(` jnz %${p}, @disruption_handler, @${p}_ok`) - } else { - needs_exc_ret = true - emit(` jnz %${p}, @_exc_ret, @${p}_ok`) - } - emit(`@${p}_ok`) - last_was_term = false + emit(` %${p}_addr =l sub %fp, 8`) + emit(` storel ${text(65535 * 2)}, %${p}_addr`) + emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) + emit(" ret 0") + last_was_term = true continue } if (op == "goframe") { diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 547f3d2f..884f4cc6 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -9,6 +9,14 @@ #include "quickjs-internal.h" #include #include +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#define CELL_THREAD_LOCAL __thread +#else +#define CELL_THREAD_LOCAL _Thread_local +#endif /* Non-inline wrappers for static inline functions in quickjs.h */ JSValue qbe_new_float64(JSContext *ctx, double d) { @@ -278,8 +286,8 @@ void cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, /* Native module environment — set before executing a native module's cell_main. Contains runtime functions (starts_with, ends_with, etc.) and use(). */ -static JSGCRef g_native_env_ref; -static int g_has_native_env = 0; +static CELL_THREAD_LOCAL JSGCRef g_native_env_ref; +static CELL_THREAD_LOCAL int g_has_native_env = 0; void cell_rt_set_native_env(JSContext *ctx, JSValue env) { if (!JS_IsNull(env) && !JS_IsStone(env)) { @@ -370,18 +378,58 @@ void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth, update the current frame pointer when it moves objects. cell_rt_refresh_fp re-derives the slot pointer after any GC call. */ -#define MAX_AOT_DEPTH 8192 -static JSGCRef g_aot_gc_refs[MAX_AOT_DEPTH]; -static int g_aot_depth = 0; +// Keep GC roots for native frames in stable heap chunks (no fixed depth cap). +#define AOT_GC_REF_CHUNK_SIZE 1024 +typedef struct AOTGCRefChunk { + JSGCRef refs[AOT_GC_REF_CHUNK_SIZE]; +} AOTGCRefChunk; + +static CELL_THREAD_LOCAL AOTGCRefChunk **g_aot_gc_ref_chunks = NULL; +static CELL_THREAD_LOCAL int g_aot_gc_ref_chunk_count = 0; +static CELL_THREAD_LOCAL int g_aot_depth = 0; + +int cell_rt_native_active(void) { + return g_aot_depth > 0; +} + +static int ensure_aot_gc_ref_slot(JSContext *ctx, int depth_index) { + if (depth_index < 0) + return 0; + int needed_chunks = (depth_index / AOT_GC_REF_CHUNK_SIZE) + 1; + if (needed_chunks <= g_aot_gc_ref_chunk_count) + return 1; + AOTGCRefChunk **new_chunks = + (AOTGCRefChunk **)realloc(g_aot_gc_ref_chunks, + (size_t)needed_chunks * sizeof(*new_chunks)); + if (!new_chunks) { + JS_ThrowOutOfMemory(ctx); + return 0; + } + g_aot_gc_ref_chunks = new_chunks; + for (int i = g_aot_gc_ref_chunk_count; i < needed_chunks; i++) { + g_aot_gc_ref_chunks[i] = (AOTGCRefChunk *)calloc(1, sizeof(AOTGCRefChunk)); + if (!g_aot_gc_ref_chunks[i]) { + JS_ThrowOutOfMemory(ctx); + return 0; + } + } + g_aot_gc_ref_chunk_count = needed_chunks; + return 1; +} + +static inline JSGCRef *aot_gc_ref_at(int depth_index) { + int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; + int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; + return &g_aot_gc_ref_chunks[chunk_index]->refs[slot_index]; +} JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { - if (g_aot_depth >= MAX_AOT_DEPTH) { - JS_ThrowTypeError(ctx, "native call stack overflow (depth %d)", g_aot_depth); + if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) { return NULL; } JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots); if (!frame) return NULL; - JSGCRef *ref = &g_aot_gc_refs[g_aot_depth]; + JSGCRef *ref = aot_gc_ref_at(g_aot_depth); JS_AddGCRef(ctx, ref); ref->val = JS_MKPTR(frame); g_aot_depth++; @@ -394,7 +442,7 @@ JSValue *cell_rt_refresh_fp(JSContext *ctx) { fprintf(stderr, "[BUG] cell_rt_refresh_fp: g_aot_depth=%d\n", g_aot_depth); abort(); } - JSValue val = g_aot_gc_refs[g_aot_depth - 1].val; + JSValue val = aot_gc_ref_at(g_aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp: frame is NULL at depth=%d val=%lld\n", @@ -412,7 +460,7 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: g_aot_depth=%d\n", g_aot_depth); abort(); } - JSValue val = g_aot_gc_refs[g_aot_depth - 1].val; + JSValue val = aot_gc_ref_at(g_aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: frame is NULL\n"); @@ -422,8 +470,12 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { } void cell_rt_leave_frame(JSContext *ctx) { + if (g_aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_leave_frame underflow\n"); + abort(); + } g_aot_depth--; - JS_DeleteGCRef(ctx, &g_aot_gc_refs[g_aot_depth]); + JS_DeleteGCRef(ctx, aot_gc_ref_at(g_aot_depth)); } /* --- Function creation and calling --- */ @@ -432,7 +484,7 @@ typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); /* Set before executing a native module's cell_main — used by cell_rt_make_function to resolve fn_ptr via dlsym */ -static void *g_current_dl_handle = NULL; +static CELL_THREAD_LOCAL void *g_current_dl_handle = NULL; /* ============================================================ Dispatch loop — the core of native function execution. @@ -442,8 +494,20 @@ static void *g_current_dl_handle = NULL; /* Pending call state — set by cell_rt_signal_call / cell_rt_signal_tail_call, read by the dispatch loop. */ -static JSValue g_pending_callee_frame = 0; /* JSFrameRegister ptr */ -static int g_pending_is_tail = 0; +static CELL_THREAD_LOCAL JSValue g_pending_callee_frame = 0; /* JSFrameRegister ptr */ +static CELL_THREAD_LOCAL int g_pending_is_tail = 0; + +/* Poll pause state on taken backward jumps (AOT backedges). + MACH can suspend/resume a register VM frame at pc granularity; native AOT + does not currently have an equivalent resume point, so we acknowledge timer + pauses by clearing pause_flag and continuing the current turn. */ +int cell_rt_check_backedge(JSContext *ctx) { + int pf = atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed); + if (pf >= 1) { + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); + } + return 0; +} void cell_rt_signal_call(JSContext *ctx, void *fp, int64_t frame_slot) { (void)ctx; @@ -467,6 +531,15 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, cell_compiled_fn fn = (cell_compiled_fn)f->u.native.fn_ptr; int nr_slots = f->u.native.nr_slots; int arity = f->length; + void *prev_dl_handle = g_current_dl_handle; + g_current_dl_handle = f->u.native.dl_handle; + +#define RETURN_DISPATCH(v) \ + do { \ + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); \ + g_current_dl_handle = prev_dl_handle; \ + return (v); \ + } while (0) /* Root func_obj across allocation — GC can move it */ JSGCRef func_ref; @@ -477,7 +550,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSValue *fp = cell_rt_enter_frame(ctx, nr_slots); if (!fp) { JS_PopGCRef(ctx, &func_ref); - return JS_EXCEPTION; + RETURN_DISPATCH(JS_EXCEPTION); } /* Re-derive func_obj after potential GC */ @@ -499,11 +572,25 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, for (;;) { g_pending_callee_frame = 0; + g_pending_is_tail = 0; + if (atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed) >= 1) + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); + + /* Keep closure creation bound to the currently executing native module. */ + if (JS_IsFunction(frame->function)) { + JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); + if (cur_fn->kind == JS_FUNC_KIND_NATIVE) + g_current_dl_handle = cur_fn->u.native.dl_handle; + } JSValue result = fn(ctx, fp); /* Re-derive frame after potential GC */ - JSValue frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + if (g_aot_depth <= 0) { + fprintf(stderr, "[BUG] native dispatch lost frame depth after fn call\n"); + abort(); + } + JSValue frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; @@ -511,7 +598,12 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* Function signaled a call — dispatch it */ JSValue callee_frame_val = g_pending_callee_frame; g_pending_callee_frame = 0; - JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_frame_val); + int pending_is_tail = g_pending_is_tail; + g_pending_is_tail = 0; + JSGCRef callee_ref; + JS_PushGCRef(ctx, &callee_ref); + callee_ref.val = callee_frame_val; + JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); int callee_argc = (int)objhdr_cap56(callee_fr->header); callee_argc = (callee_argc >= 2) ? callee_argc - 2 : 0; JSValue callee_fn_val = callee_fr->function; @@ -521,95 +613,86 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* Resume caller with exception pending */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_ref); continue; } - JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(callee_fn_val); + JSGCRef callee_fn_ref; + JS_PushGCRef(ctx, &callee_fn_ref); + callee_fn_ref.val = callee_fn_val; + JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(callee_fn_ref.val); if (callee_fn->kind == JS_FUNC_KIND_NATIVE) { /* Native-to-native call — no C stack growth */ cell_compiled_fn callee_ptr = (cell_compiled_fn)callee_fn->u.native.fn_ptr; int callee_slots = callee_fn->u.native.nr_slots; - if (g_pending_is_tail) { - /* Tail call: reuse or replace current frame */ - if (callee_slots <= (int)objhdr_cap56(frame->header)) { - /* Reuse current frame */ - int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; - if (cc < 0) cc = callee_argc; - frame->slots[0] = callee_fr->slots[0]; /* this */ - for (int i = 0; i < cc && i < callee_slots - 1; i++) - frame->slots[1 + i] = callee_fr->slots[1 + i]; - /* Null out remaining slots */ - int cur_slots = (int)objhdr_cap56(frame->header); - for (int i = 1 + cc; i < cur_slots; i++) - frame->slots[i] = JS_NULL; - frame->function = callee_fn_val; - frame->address = JS_NewInt32(ctx, 0); - fn = callee_ptr; - /* fp stays the same (same frame) */ - } else { - /* Need bigger frame — save callee info, pop+push */ - JSValue saved_caller = frame->caller; - JSValue callee_this = callee_fr->slots[0]; - int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; - if (cc < 0) cc = callee_argc; - JSValue callee_args[cc > 0 ? cc : 1]; - for (int i = 0; i < cc; i++) - callee_args[i] = callee_fr->slots[1 + i]; + if (pending_is_tail) { + /* Tail call: replace frame instead of mutating in place. + In-place reuse breaks closures that captured the caller frame. */ + JSValue saved_caller = frame->caller; + int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; + if (cc < 0) cc = callee_argc; - /* Pop old frame */ - cell_rt_leave_frame(ctx); + /* Pop old frame */ + cell_rt_leave_frame(ctx); - /* Push new right-sized frame */ - JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); - if (!new_fp) - return JS_EXCEPTION; - JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); - new_frame->function = callee_fn_val; - new_frame->caller = saved_caller; - new_frame->slots[0] = callee_this; - for (int i = 0; i < cc && i < callee_slots - 1; i++) - new_frame->slots[1 + i] = callee_args[i]; - frame = new_frame; - fp = new_fp; - fn = callee_ptr; + /* Push new right-sized frame */ + JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); + if (!new_fp) { + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(JS_EXCEPTION); } + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); + callee_fn_val = callee_fn_ref.val; + new_frame->function = callee_fn_val; + new_frame->caller = saved_caller; + new_frame->slots[0] = callee_fr->slots[0]; + for (int i = 0; i < cc && i < callee_slots - 1; i++) + new_frame->slots[1 + i] = callee_fr->slots[1 + i]; + frame = new_frame; + fp = new_fp; + fn = callee_ptr; } else { /* Regular call: push new frame, link caller */ int ret_info = JS_VALUE_GET_INT(frame->address); int resume_seg = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; - /* Save callee info before allocation */ - JSValue callee_this = callee_fr->slots[0]; int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; if (cc < 0) cc = callee_argc; - JSValue callee_args[cc > 0 ? cc : 1]; - for (int i = 0; i < cc; i++) - callee_args[i] = callee_fr->slots[1 + i]; JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); if (!new_fp) { /* Resume caller with exception pending */ - frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); continue; } + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); /* Re-derive caller frame after alloc */ - frame_val = g_aot_gc_refs[g_aot_depth - 2].val; + if (g_aot_depth <= 1) { + fprintf(stderr, "[BUG] native dispatch bad depth while linking caller: %d\n", g_aot_depth); + abort(); + } + frame_val = aot_gc_ref_at(g_aot_depth - 2)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); + callee_fn_val = callee_fn_ref.val; new_frame->function = callee_fn_val; new_frame->caller = JS_MKPTR(frame); - new_frame->slots[0] = callee_this; + new_frame->slots[0] = callee_fr->slots[0]; for (int i = 0; i < cc && i < callee_slots - 1; i++) - new_frame->slots[1 + i] = callee_args[i]; + new_frame->slots[1 + i] = callee_fr->slots[1 + i]; /* Save return address in caller */ frame->address = JS_NewInt32(ctx, (resume_seg << 16) | ret_slot); @@ -630,7 +713,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, callee_argc, &callee_fr->slots[1], 0); /* Re-derive frame after call */ - frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; @@ -643,28 +726,29 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, Just resume it — it will detect the exception. */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_ref); continue; } /* Clear stale exception */ if (JS_HasException(ctx)) JS_GetException(ctx); - if (g_pending_is_tail) { + if (pending_is_tail) { /* Tail call to non-native: return its result up the chain */ /* Pop current frame and return to caller */ if (g_aot_depth <= base_depth) { cell_rt_leave_frame(ctx); - return ret; + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(ret); } /* Pop current frame, return to caller frame */ - JSValue caller_val = frame->caller; cell_rt_leave_frame(ctx); - if (JS_IsNull(caller_val) || g_aot_depth < base_depth) { - return ret; + if (g_aot_depth < base_depth) { + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(ret); } - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(caller_val); - /* Update GC ref to point to caller */ - g_aot_gc_refs[g_aot_depth - 1].val = caller_val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); int ret_slot = ret_info & 0xFFFF; @@ -684,6 +768,8 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fn = (cell_compiled_fn)cur_fn->u.native.fn_ptr; } } + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); continue; } @@ -696,19 +782,16 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (g_aot_depth <= base_depth) { cell_rt_leave_frame(ctx); - return JS_EXCEPTION; + RETURN_DISPATCH(JS_EXCEPTION); } - - JSValue exc_caller_val = frame->caller; cell_rt_leave_frame(ctx); - - if (JS_IsNull(exc_caller_val) || g_aot_depth < base_depth) { - return JS_EXCEPTION; + if (g_aot_depth < base_depth) { + RETURN_DISPATCH(JS_EXCEPTION); } /* Resume caller — it will check JS_HasException and branch to handler */ - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(exc_caller_val); - g_aot_gc_refs[g_aot_depth - 1].val = exc_caller_val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; JSFunction *exc_caller_fn = JS_VALUE_GET_FUNCTION(frame->function); @@ -719,19 +802,16 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* Normal return — pop frame and store result in caller */ if (g_aot_depth <= base_depth) { cell_rt_leave_frame(ctx); - return result; + RETURN_DISPATCH(result); } - - JSValue caller_val = frame->caller; cell_rt_leave_frame(ctx); - - if (JS_IsNull(caller_val) || g_aot_depth < base_depth) { - return result; + if (g_aot_depth < base_depth) { + RETURN_DISPATCH(result); } /* Return to caller frame */ - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(caller_val); - g_aot_gc_refs[g_aot_depth - 1].val = caller_val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); int ret_slot = ret_info & 0xFFFF; @@ -742,6 +822,8 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fn = (cell_compiled_fn)caller_fn->u.native.fn_ptr; continue; } + +#undef RETURN_DISPATCH } /* Create a native function object from a compiled fn_idx. @@ -761,7 +843,7 @@ JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp, /* Get the current frame as outer_frame for closures */ JSValue outer_frame = JS_NULL; if (g_aot_depth > 0) - outer_frame = g_aot_gc_refs[g_aot_depth - 1].val; + outer_frame = aot_gc_ref_at(g_aot_depth - 1)->val; return js_new_native_function(ctx, fn_ptr, g_current_dl_handle, (uint16_t)nr_slots, (int)nr_args, outer_frame); diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index cc70c859..ad5ffcf7 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -1545,9 +1545,14 @@ static inline void set_value (JSContext *ctx, JSValue *pval, JSValue new_val) { } void JS_ThrowInterrupted (JSContext *ctx); +int cell_rt_native_active(void); static inline __exception int js_poll_interrupts (JSContext *ctx) { if (unlikely (atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2)) { + if (cell_rt_native_active ()) { + atomic_store_explicit (&ctx->pause_flag, 0, memory_order_relaxed); + return 0; + } JS_ThrowInterrupted (ctx); return -1; } diff --git a/source/runtime.c b/source/runtime.c index e378e0c2..72f706e9 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -5366,6 +5366,10 @@ JSValue js_regexp_toString (JSContext *ctx, JSValue this_val, int argc, JSValue int lre_check_timeout (void *opaque) { JSContext *ctx = opaque; + if (cell_rt_native_active ()) { + atomic_store_explicit (&ctx->pause_flag, 0, memory_order_relaxed); + return 0; + } return atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2; } From 621da78de9958afd38a9c3feadf31094876386e0 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 20:24:12 -0600 Subject: [PATCH 4/9] faster aot --- build.cm | 2 +- qbe.cm | 6 +- qbe_emit.cm | 1005 ++++++++++++++++++++++++++++++++---------- source/qbe_helpers.c | 363 +++++++++++---- 4 files changed, 1065 insertions(+), 311 deletions(-) diff --git a/build.cm b/build.cm index 5d14d501..b181cc8e 100644 --- a/build.cm +++ b/build.cm @@ -81,7 +81,7 @@ function content_hash(str) { } // Bump when native codegen/runtime ABI changes so stale dylibs are not reused. -def NATIVE_CACHE_VERSION = "native-v8" +def NATIVE_CACHE_VERSION = "native-v16" // Enable AOT ASan by creating .cell/asan_aot in the package root. function native_sanitize_flags() { diff --git a/qbe.cm b/qbe.cm index 2424b566..25e1e53c 100644 --- a/qbe.cm +++ b/qbe.cm @@ -11,7 +11,7 @@ def js_null = 7 def js_false = 3 def js_true = 35 def js_exception = 15 -def js_empty_text = 27 +def js_empty_text = 11 // Shared closure vars for functions with >4 params var _qop = null @@ -67,13 +67,13 @@ var is_ptr = function(p, v) { var is_imm_text = function(p, v) { return ` %${p}.t =l and ${v}, 31 - %${p} =w ceql %${p}.t, 27 + %${p} =w ceql %${p}.t, 11 ` } var is_text = function(p, v) { return ` %${p}.imm =l and ${v}, 31 - %${p}.is_imm =w ceql %${p}.imm, 27 + %${p}.is_imm =w ceql %${p}.imm, 11 jnz %${p}.is_imm, @${p}.yes, @${p}.chk_ptr @${p}.chk_ptr %${p}.ptag =l and ${v}, 7 diff --git a/qbe_emit.cm b/qbe_emit.cm index e62b419f..43674cd6 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -182,10 +182,6 @@ ${sw("w", "%fp", "%dest", "%r")} // Type checks via C (no ctx needed except is_proxy) var tc_ops = [ - ["is_text", "JS_IsText", false], - ["is_array", "JS_IsArray", false], - ["is_func", "JS_IsFunction", false], - ["is_record", "JS_IsRecord", false], ["is_stone", "JS_IsStone", false], ["is_proxy", "cell_rt_is_proxy", true] ] @@ -223,10 +219,141 @@ ${sw("w", "%fp", "%dest", "%r")} i = i + 1 } - // Float comparisons: call qbe_float_cmp(ctx, op_id, a, b) → w, tag + // is_text: immediate text OR ptr->header type check (OBJ_TEXT=2), chase forwards + h[] = `export function $__is_text_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %imm =l and %a, 31 + %is_imm =w ceql %imm, 11 + jnz %is_imm, @yes, @chk_ptr +@chk_ptr + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 2 + jmp @pack +@yes + %cr =w copy 1 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // is_record: pointer + header type check (OBJ_RECORD=3), chase forwards + h[] = `export function $__is_record_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 3 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // is_array: inline pointer+header check (OBJ_ARRAY=0), chase forwards + h[] = `export function $__is_array_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 0 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // is_func: inline pointer+header check (OBJ_FUNCTION=4), chase forwards + h[] = `export function $__is_func_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 4 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // Float comparisons: decode short-float/int inline, then compare in QBE. var fc_ops = [ - ["eq_float", 0], ["ne_float", 1], ["lt_float", 2], - ["le_float", 3], ["gt_float", 4], ["ge_float", 5] + ["eq_float", "ceqd"], ["ne_float", "cned"], ["lt_float", "cltd"], + ["le_float", "cled"], ["gt_float", "cgtd"], ["ge_float", "cged"] ] i = 0 while (i < length(fc_ops)) { @@ -234,7 +361,63 @@ ${sw("w", "%fp", "%dest", "%r")} @entry ${sr("a", "%s1")} ${sr("b", "%s2")} - %cr =w call $qbe_float_cmp(l %ctx, w ${fc_ops[i][1]}, l %a, l %b) + %a_tag =l and %a, 1 + %a_is_int =w ceql %a_tag, 0 + jnz %a_is_int, @a_int, @a_float +@a_int + %a_isl =l sar %a, 1 + %a_iw =w copy %a_isl + %ad =d swtof %a_iw + jmp @a_done +@a_float + %a_sexp =l shr %a, 55 + %a_sexp =l and %a_sexp, 255 + %a_is_zero =w ceql %a_sexp, 0 + jnz %a_is_zero, @a_zero, @a_decode +@a_zero + %ad =d copy d_0.0 + jmp @a_done +@a_decode + %a_sign =l shr %a, 63 + %a_mant =l shr %a, 3 + %a_mant =l and %a_mant, 4503599627370495 + %a_dexp =l sub %a_sexp, 127 + %a_dexp =l add %a_dexp, 1023 + %a_s63 =l shl %a_sign, 63 + %a_e52 =l shl %a_dexp, 52 + %a_bits =l or %a_s63, %a_e52 + %a_bits =l or %a_bits, %a_mant + %ad =d cast %a_bits +@a_done + %b_tag =l and %b, 1 + %b_is_int =w ceql %b_tag, 0 + jnz %b_is_int, @b_int, @b_float +@b_int + %b_isl =l sar %b, 1 + %b_iw =w copy %b_isl + %bd =d swtof %b_iw + jmp @b_done +@b_float + %b_sexp =l shr %b, 55 + %b_sexp =l and %b_sexp, 255 + %b_is_zero =w ceql %b_sexp, 0 + jnz %b_is_zero, @b_zero, @b_decode +@b_zero + %bd =d copy d_0.0 + jmp @b_done +@b_decode + %b_sign =l shr %b, 63 + %b_mant =l shr %b, 3 + %b_mant =l and %b_mant, 4503599627370495 + %b_dexp =l sub %b_sexp, 127 + %b_dexp =l add %b_dexp, 1023 + %b_s63 =l shl %b_sign, 63 + %b_e52 =l shl %b_dexp, 52 + %b_bits =l or %b_s63, %b_e52 + %b_bits =l or %b_bits, %b_mant + %bd =d cast %b_bits +@b_done + %cr =w ${fc_ops[i][1]} %ad, %bd %crext =l extuw %cr %sh =l shl %crext, 5 %r =l or %sh, 3 @@ -296,12 +479,72 @@ ${sw("w", "%fp", "%dest", "%r")} i = i + 1 } - // not: JS_ToBool + negate + tag + // not: inline truthiness (no JS_ToBool call) h[] = `export function $__not_ss(l %ctx, l %fp, l %dest, l %src) { @entry ${sr("a", "%src")} - %bval =w call $JS_ToBool(l %ctx, l %a) - %neg =w ceqw %bval, 0 + %t5 =l and %a, 31 + %is_bool =w ceql %t5, 3 + jnz %is_bool, @bool, @chk_null +@bool + %truthy =w cnel %a, 3 + jmp @truthy_done +@chk_null + %is_null =w ceql %t5, 7 + jnz %is_null, @falsey, @chk_int +@chk_int + %t1 =l and %a, 1 + %is_int =w ceql %t1, 0 + jnz %is_int, @int_path, @chk_imm_text +@int_path + %truthy =w cnel %a, 0 + jmp @truthy_done +@chk_imm_text + %is_imm_text =w ceql %t5, 11 + jnz %is_imm_text, @imm_text, @chk_ptr +@imm_text + %truthy =w cnel %a, 11 + jmp @truthy_done +@chk_ptr + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr_path, @chk_sfloat +@chk_sfloat + %is_sfloat =w ceql %ptag, 5 + jnz %is_sfloat, @sfloat_path, @other_imm +@sfloat_path + %sexp =l shr %a, 55 + %sexp =l and %sexp, 255 + %truthy =w cnel %sexp, 0 + jmp @truthy_done +@other_imm + %truthy =w copy 1 + jmp @truthy_done +@ptr_path + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk_text_ptr +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk_text_ptr + %is_text_ptr =w ceql %ht, 2 + jnz %is_text_ptr, @text_ptr, @ptr_truthy +@text_ptr + %len =l shr %hdr, 8 + %truthy =w cnel %len, 0 + jmp @truthy_done +@ptr_truthy + %truthy =w copy 1 + jmp @truthy_done +@falsey + %truthy =w copy 0 +@truthy_done + %neg =w ceqw %truthy, 0 %nex =l extuw %neg %sh =l shl %nex, 5 %r =l or %sh, 3 @@ -332,150 +575,214 @@ ${sw("w", "%fp", "%dest", "%r")} h[] = `export function $__bnot_ss(l %ctx, l %fp, l %dest, l %src) { @entry ${sr("a", "%src")} - %r =l call $qbe_bnot(l %ctx, l %a) + %tag =l and %a, 1 + %is_int =w ceql %tag, 0 + jnz %is_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %aiw =w copy %ai + %rw =w xor %aiw, -1 + %rl =l extsw %rw + %r =l shl %rl, 1 ${sw("w", "%fp", "%dest", "%r")} ret +@bad + call $cell_rt_disrupt(l %ctx) + ret }` - // Bitwise binary ops - var bw_ops = [ - ["band", "qbe_bitwise_and"], ["bor", "qbe_bitwise_or"], - ["bxor", "qbe_bitwise_xor"], ["bshl", "qbe_shift_shl"], - ["bshr", "qbe_shift_sar"], ["bushr", "qbe_shift_shr"] - ] - i = 0 - while (i < length(bw_ops)) { - h[] = `export function $__${bw_ops[i][0]}_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { + // Bitwise binary ops (int-only; type checks should be inserted upstream) + h[] = `export function $__band_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { @entry ${sr("a", "%s1")} ${sr("b", "%s2")} - %r =l call $${bw_ops[i][1]}(l %ctx, l %a, l %b) + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %rw =w and %aiw, %biw + %rl =l extsw %rw + %r =l shl %rl, 1 ${sw("w", "%fp", "%dest", "%r")} ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bor_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %rw =w or %aiw, %biw + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bxor_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %rw =w xor %aiw, %biw + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bshl_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %sh =w and %biw, 31 + %rw =w shl %aiw, %sh + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bshr_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %sh =w and %biw, 31 + %rw =w sar %aiw, %sh + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bushr_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %sh =w and %biw, 31 + %rw =w shr %aiw, %sh + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret }` - i = i + 1 - } // ============================================================ // Category C: Allocating helpers (return fp or 0) // ============================================================ - // add: int fast path in-helper, slow path calls runtime - h[] = `export function l $__add_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { + // concat allocates; keep refresh path + h[] = `export function l $__concat_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { @entry ${sr("a", "%s1")} ${sr("b", "%s2")} - %a_tag =l and %a, 1 - %b_tag =l and %b, 1 - %a_is_int =w ceql %a_tag, 0 - %b_is_int =w ceql %b_tag, 0 - %both_int =w and %a_is_int, %b_is_int - jnz %both_int, @int_fast, @slow -@int_fast - %ai =l sar %a, 1 - %bi =l sar %b, 1 - %sum =l add %ai, %bi - %sumw =w copy %sum - %sumext =l extsw %sumw - %sum_ok =w ceql %sumext, %sum - jnz %sum_ok, @int_store, @slow -@int_store - %rtag =l shl %sum, 1 -${sw("w", "%fp", "%dest", "%rtag")} + %r =l call $JS_ConcatString(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // access_lit(ctx, fp, dest, lit_idx) + h[] = `export function l $__access_lit_ss(l %ctx, l %fp, l %dest, l %lit_idx) { +@entry + %r =l call $cell_rt_access_lit(l %ctx, l %lit_idx) + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} ret %fp -@slow - %r =l call $cell_rt_add(l %ctx, l %a, l %b) -${alloc_tail("%r")} +@exc + ret 0 }` - // sub: int fast path in-helper, slow path calls float helper - h[] = `export function l $__sub_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { -@entry -${sr("a", "%s1")} -${sr("b", "%s2")} - %a_tag =l and %a, 1 - %b_tag =l and %b, 1 - %a_is_int =w ceql %a_tag, 0 - %b_is_int =w ceql %b_tag, 0 - %both_int =w and %a_is_int, %b_is_int - jnz %both_int, @int_fast, @slow -@int_fast - %ai =l sar %a, 1 - %bi =l sar %b, 1 - %diff =l sub %ai, %bi - %diffw =w copy %diff - %diffext =l extsw %diffw - %diff_ok =w ceql %diffext, %diff - jnz %diff_ok, @int_store, @slow -@int_store - %rtag =l shl %diff, 1 -${sw("w", "%fp", "%dest", "%rtag")} - ret %fp -@slow - %r =l call $qbe_float_sub(l %ctx, l %a, l %b) -${alloc_tail("%r")} -}` - - // mul: int fast path in-helper, slow path calls float helper - h[] = `export function l $__mul_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { -@entry -${sr("a", "%s1")} -${sr("b", "%s2")} - %a_tag =l and %a, 1 - %b_tag =l and %b, 1 - %a_is_int =w ceql %a_tag, 0 - %b_is_int =w ceql %b_tag, 0 - %both_int =w and %a_is_int, %b_is_int - jnz %both_int, @int_fast, @slow -@int_fast - %ai =l sar %a, 1 - %bi =l sar %b, 1 - %prod =l mul %ai, %bi - %prodw =w copy %prod - %prodext =l extsw %prodw - %prod_ok =w ceql %prodext, %prod - jnz %prod_ok, @int_store, @slow -@int_store - %rtag =l shl %prod, 1 -${sw("w", "%fp", "%dest", "%rtag")} - ret %fp -@slow - %r =l call $qbe_float_mul(l %ctx, l %a, l %b) -${alloc_tail("%r")} -}` - - // Remaining allocating binary ops: call C, refresh, write dest - var ab_ops = [ - ["div", "qbe_float_div"], - ["mod", "qbe_float_mod"], ["pow", "qbe_float_pow"], - ["concat", "JS_ConcatString"] - ] - i = 0 - while (i < length(ab_ops)) { - h[] = `export function l $__${ab_ops[i][0]}_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { -@entry -${sr("a", "%s1")} -${sr("b", "%s2")} - %r =l call $${ab_ops[i][1]}(l %ctx, l %a, l %b) -${alloc_tail("%r")} -}` - i = i + 1 - } - - // Allocating unary: negate - h[] = `export function l $__neg_ss(l %ctx, l %fp, l %dest, l %src) { -@entry -${sr("a", "%src")} - %r =l call $qbe_float_neg(l %ctx, l %a) -${alloc_tail("%r")} -}` - - // Property access: load_field(ctx, fp, dest, obj_slot, name_ptr) - h[] = `export function l $__load_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %name) { + // Property access: load_field(ctx, fp, dest, obj_slot, lit_idx) + h[] = `export function l $__load_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %lit_idx) { @entry ${sr("a", "%obj_slot")} - %r =l call $cell_rt_load_field(l %ctx, l %a, l %name) -${alloc_tail("%r")} + %r =l call $cell_rt_load_field_lit(l %ctx, l %a, l %lit_idx) + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` // load_dynamic(ctx, fp, dest, obj_slot, key_slot) @@ -484,7 +791,13 @@ ${alloc_tail("%r")} ${sr("a", "%obj_slot")} ${sr("b", "%key_slot")} %r =l call $cell_rt_load_dynamic(l %ctx, l %a, l %b) -${alloc_tail("%r")} + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` // load_index(ctx, fp, dest, arr_slot, idx_slot) @@ -493,16 +806,26 @@ ${alloc_tail("%r")} ${sr("a", "%arr_slot")} ${sr("b", "%idx_slot")} %r =l call $cell_rt_load_index(l %ctx, l %a, l %b) -${alloc_tail("%r")} + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` - // store_field(ctx, fp, obj_slot, val_slot, name_ptr) — no dest write - h[] = `export function l $__store_field_ss(l %ctx, l %fp, l %obj_slot, l %val_slot, l %name) { + // store_field(ctx, fp, obj_slot, val_slot, lit_idx) — no dest write + h[] = `export function l $__store_field_ss(l %ctx, l %fp, l %obj_slot, l %val_slot, l %lit_idx) { @entry ${sr("a", "%obj_slot")} ${sr("b", "%val_slot")} - call $cell_rt_store_field(l %ctx, l %b, l %a, l %name) -${alloc_tail_nw()} + %ok =w call $cell_rt_store_field_lit(l %ctx, l %b, l %a, l %lit_idx) + jnz %ok, @ok, @exc +@ok + ret %fp +@exc + ret 0 }` // store_dynamic(ctx, fp, obj_slot, val_slot, key_slot) — no dest write @@ -511,8 +834,12 @@ ${alloc_tail_nw()} ${sr("a", "%obj_slot")} ${sr("b", "%val_slot")} ${sr("c", "%key_slot")} - call $cell_rt_store_dynamic(l %ctx, l %b, l %a, l %c) -${alloc_tail_nw()} + %ok =w call $cell_rt_store_dynamic(l %ctx, l %b, l %a, l %c) + jnz %ok, @ok, @exc +@ok + ret %fp +@exc + ret 0 }` // store_index(ctx, fp, obj_slot, val_slot, idx_slot) — no dest write @@ -521,8 +848,12 @@ ${alloc_tail_nw()} ${sr("a", "%obj_slot")} ${sr("b", "%val_slot")} ${sr("c", "%idx_slot")} - call $cell_rt_store_index(l %ctx, l %b, l %a, l %c) -${alloc_tail_nw()} + %ok =w call $cell_rt_store_index(l %ctx, l %b, l %a, l %c) + jnz %ok, @ok, @exc +@ok + ret %fp +@exc + ret 0 }` // frame(ctx, fp, dest, fn_slot, nargs) @@ -590,13 +921,14 @@ ${alloc_tail("%r")} h[] = `export function l $__new_float64_ss(l %ctx, l %fp, l %dest, d %val) { @entry %r =l call $qbe_new_float64(l %ctx, d %val) -${alloc_tail("%r")} +${sw("w", "%fp", "%dest", "%r")} + ret %fp }` - // get_intrinsic(ctx, fp, dest, name_ptr) - h[] = `export function l $__get_intrinsic_ss(l %ctx, l %fp, l %dest, l %name_ptr) { + // get_intrinsic(ctx, fp, dest, lit_idx) + h[] = `export function l $__get_intrinsic_ss(l %ctx, l %fp, l %dest, l %lit_idx) { @entry - %r =l call $cell_rt_get_intrinsic(l %ctx, l %name_ptr) + %r =l call $cell_rt_get_intrinsic_lit(l %ctx, l %lit_idx) ${alloc_tail("%r")} }` @@ -631,11 +963,11 @@ ${sr("a", "%src")} ${alloc_tail("%r")} }` - // delete_field(ctx, fp, dest, obj_slot, name_ptr) - h[] = `export function l $__delete_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %name) { + // delete_field(ctx, fp, dest, obj_slot, lit_idx) + h[] = `export function l $__delete_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %lit_idx) { @entry ${sr("a", "%obj_slot")} - %r =l call $cell_rt_delete_str(l %ctx, l %a, l %name) + %r =l call $cell_rt_delete_lit(l %ctx, l %a, l %lit_idx) ${alloc_tail("%r")} }` @@ -671,8 +1003,11 @@ var qbe_emit = function(ir, qbe, export_name) { var out = [] var data_out = [] var str_table = {} + var str_entries = [] var str_id = 0 var uid = 0 + var lit_data = null + var si = 0 // ============================================================ // Output helpers @@ -714,8 +1049,10 @@ var qbe_emit = function(ir, qbe, export_name) { escaped = replace(escaped, "\t", "\\t") var line = "data " + label + ' = ' + '{ b "' + escaped + '", b 0 }' push(data_out, line) - str_table[val] = label - return label + var entry = { label: label, idx: length(str_entries) } + push(str_entries, entry) + str_table[val] = entry + return entry } // ============================================================ @@ -753,6 +1090,7 @@ var qbe_emit = function(ir, qbe, export_name) { var ei = 0 var elem_slot = 0 var v = null + var rv = null var lhs = null var rhs = null var obj = null @@ -789,6 +1127,9 @@ var qbe_emit = function(ir, qbe, export_name) { var jnn_lbl = null var jnn_idx = null var jnn_backedge = false + var truthy = null + var lhs_d = null + var rhs_d = null // Pre-scan: count invoke/tail_invoke points to assign segment numbers. // Must skip dead code (instructions after terminators) the same way @@ -907,6 +1248,162 @@ var qbe_emit = function(ir, qbe, export_name) { } } + // Inline JS_ToBool equivalent for hot branch paths. + // Returns a `%name` holding w 0/1 truthiness. + var emit_truthy_w = function(val) { + var tp = fresh() + emit(` %${tp}_t5 =l and ${val}, 31`) + emit(` %${tp}_is_bool =w ceql %${tp}_t5, 3`) + emit(` jnz %${tp}_is_bool, @${tp}_bool, @${tp}_chk_null`) + emit(`@${tp}_bool`) + emit(` %${tp}_truthy =w cnel ${val}, 3`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_chk_null`) + emit(` %${tp}_is_null =w ceql %${tp}_t5, 7`) + emit(` jnz %${tp}_is_null, @${tp}_falsey, @${tp}_chk_int`) + emit(`@${tp}_chk_int`) + emit(` %${tp}_t1 =l and ${val}, 1`) + emit(` %${tp}_is_int =w ceql %${tp}_t1, 0`) + emit(` jnz %${tp}_is_int, @${tp}_int_path, @${tp}_chk_imm_text`) + emit(`@${tp}_int_path`) + emit(` %${tp}_truthy =w cnel ${val}, 0`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_chk_imm_text`) + emit(` %${tp}_is_imm_text =w ceql %${tp}_t5, 11`) + emit(` jnz %${tp}_is_imm_text, @${tp}_imm_text, @${tp}_chk_ptr`) + emit(`@${tp}_imm_text`) + emit(` %${tp}_truthy =w cnel ${val}, 11`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_chk_ptr`) + emit(` %${tp}_ptag =l and ${val}, 7`) + emit(` %${tp}_is_ptr =w ceql %${tp}_ptag, 1`) + emit(` jnz %${tp}_is_ptr, @${tp}_ptr_path, @${tp}_chk_sfloat`) + emit(`@${tp}_chk_sfloat`) + emit(` %${tp}_is_sfloat =w ceql %${tp}_ptag, 5`) + emit(` jnz %${tp}_is_sfloat, @${tp}_sfloat_path, @${tp}_other_imm`) + emit(`@${tp}_sfloat_path`) + emit(` %${tp}_sexp =l shr ${val}, 55`) + emit(` %${tp}_sexp =l and %${tp}_sexp, 255`) + emit(` %${tp}_truthy =w cnel %${tp}_sexp, 0`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_other_imm`) + emit(` %${tp}_truthy =w copy 1`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_ptr_path`) + emit(` %${tp}_ptr =l and ${val}, -8`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(`@${tp}_chase`) + emit(` %${tp}_ht =l and %${tp}_hdr, 7`) + emit(` %${tp}_is_fwd =w ceql %${tp}_ht, 7`) + emit(` jnz %${tp}_is_fwd, @${tp}_follow, @${tp}_chk_text_ptr`) + emit(`@${tp}_follow`) + emit(` %${tp}_ptr =l shr %${tp}_hdr, 3`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(` jmp @${tp}_chase`) + emit(`@${tp}_chk_text_ptr`) + emit(` %${tp}_is_text_ptr =w ceql %${tp}_ht, 2`) + emit(` jnz %${tp}_is_text_ptr, @${tp}_text_ptr, @${tp}_ptr_truthy`) + emit(`@${tp}_text_ptr`) + emit(` %${tp}_len =l shr %${tp}_hdr, 8`) + emit(` %${tp}_truthy =w cnel %${tp}_len, 0`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_ptr_truthy`) + emit(` %${tp}_truthy =w copy 1`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_falsey`) + emit(` %${tp}_truthy =w copy 0`) + emit(`@${tp}_done`) + return `%${tp}_truthy` + } + + // Returns w 0/1 for JS text (immediate or heap), following forwards. + var emit_is_text_w = function(val) { + var tp = fresh() + emit(` %${tp}_imm =l and ${val}, 31`) + emit(` %${tp}_is_imm =w ceql %${tp}_imm, 11`) + emit(` jnz %${tp}_is_imm, @${tp}_yes, @${tp}_chk_ptr`) + emit(`@${tp}_chk_ptr`) + emit(` %${tp}_ptag =l and ${val}, 7`) + emit(` %${tp}_is_ptr =w ceql %${tp}_ptag, 1`) + emit(` jnz %${tp}_is_ptr, @${tp}_ptr, @${tp}_no`) + emit(`@${tp}_ptr`) + emit(` %${tp}_ptr =l and ${val}, -8`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(`@${tp}_chase`) + emit(` %${tp}_ht =l and %${tp}_hdr, 7`) + emit(` %${tp}_is_fwd =w ceql %${tp}_ht, 7`) + emit(` jnz %${tp}_is_fwd, @${tp}_follow, @${tp}_chk`) + emit(`@${tp}_follow`) + emit(` %${tp}_ptr =l shr %${tp}_hdr, 3`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(` jmp @${tp}_chase`) + emit(`@${tp}_chk`) + emit(` %${tp}_is_text =w ceql %${tp}_ht, 2`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_yes`) + emit(` %${tp}_is_text =w copy 1`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_no`) + emit(` %${tp}_is_text =w copy 0`) + emit(`@${tp}_done`) + return `%${tp}_is_text` + } + + // Returns w 0/1 for JS numbers (int or short-float). + var emit_is_num_w = function(val) { + var np = fresh() + emit(` %${np}_t1 =l and ${val}, 1`) + emit(` %${np}_ii =w ceql %${np}_t1, 0`) + emit(` %${np}_t2 =l and ${val}, 7`) + emit(` %${np}_fi =w ceql %${np}_t2, 5`) + emit(` %${np}_is_num =w or %${np}_ii, %${np}_fi`) + return `%${np}_is_num` + } + + // Pack w 0/1 into tagged JS bool (JS_FALSE/JS_TRUE). + var emit_pack_bool_js = function(wv) { + var bp = fresh() + emit(` %${bp}_ext =l extuw ${wv}`) + emit(` %${bp}_sh =l shl %${bp}_ext, 5`) + emit(` %${bp}_js =l or %${bp}_sh, 3`) + return `%${bp}_js` + } + + // Convert a known numeric JSValue (int or short-float) to QBE double. + // Type checks happen earlier in mcode/streamline. + var emit_num_to_double = function(val) { + var np = fresh() + emit(` %${np}_tag =l and ${val}, 1`) + emit(` %${np}_is_int =w ceql %${np}_tag, 0`) + emit(` jnz %${np}_is_int, @${np}_int, @${np}_float`) + emit(`@${np}_int`) + emit(` %${np}_isl =l sar ${val}, 1`) + emit(` %${np}_iw =w copy %${np}_isl`) + emit(` %${np}_d =d swtof %${np}_iw`) + emit(` jmp @${np}_done`) + emit(`@${np}_float`) + emit(` %${np}_sexp =l shr ${val}, 55`) + emit(` %${np}_sexp =l and %${np}_sexp, 255`) + emit(` %${np}_is_zero =w ceql %${np}_sexp, 0`) + emit(` jnz %${np}_is_zero, @${np}_fzero, @${np}_fdecode`) + emit(`@${np}_fzero`) + emit(` %${np}_d =d copy d_0.0`) + emit(` jmp @${np}_done`) + emit(`@${np}_fdecode`) + emit(` %${np}_sign =l shr ${val}, 63`) + emit(` %${np}_mant =l shr ${val}, 3`) + emit(` %${np}_mant =l and %${np}_mant, 4503599627370495`) + emit(` %${np}_dexp =l sub %${np}_sexp, 127`) + emit(` %${np}_dexp =l add %${np}_dexp, 1023`) + emit(` %${np}_s63 =l shl %${np}_sign, 63`) + emit(` %${np}_e52 =l shl %${np}_dexp, 52`) + emit(` %${np}_bits =l or %${np}_s63, %${np}_e52`) + emit(` %${np}_bits =l or %${np}_bits, %${np}_mant`) + emit(` %${np}_d =d cast %${np}_bits`) + emit(`@${np}_done`) + return `%${np}_d` + } + // Walk instructions var last_was_term = false i = 0 @@ -977,12 +1474,12 @@ var qbe_emit = function(ir, qbe, export_name) { } } else if (is_text(a2)) { sl = intern_str(a2) - emit(` %fp =l call $__new_string_ss(l %ctx, l %fp, l ${text(a1)}, l ${sl})`) + emit(` %fp =l call $__access_lit_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(sl.idx)})`) emit_exc_check() } else if (is_object(a2)) { if (a2.make == "intrinsic") { sl = intern_str(a2.name) - emit(` %fp =l call $__get_intrinsic_ss(l %ctx, l %fp, l ${text(a1)}, l ${sl})`) + emit(` %fp =l call $__get_intrinsic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(sl.idx)})`) emit_exc_check() } else if (a2.kind == "number") { if (a2.number != null && is_integer(a2.number)) { @@ -995,7 +1492,7 @@ var qbe_emit = function(ir, qbe, export_name) { } } else if (a2.kind == "text") { sl = intern_str(a2.value) - emit(` %fp =l call $__new_string_ss(l %ctx, l %fp, l ${text(a1)}, l ${sl})`) + emit(` %fp =l call $__access_lit_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(sl.idx)})`) emit_exc_check() } else if (a2.kind == "true") { s_write(a1, text(qbe.js_true)) @@ -1015,7 +1512,8 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Movement --- if (op == "move") { - emit(` call $__move_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + s_write(a1, v) continue } @@ -1044,18 +1542,34 @@ var qbe_emit = function(ir, qbe, export_name) { s_write(a1, `%${p}_tag`) emit(` jmp @${p}_done`) emit(`@${p}_slow`) - emit(` %${p}_r =l call $cell_rt_add(l %ctx, l ${lhs}, l ${rhs})`) - emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) - chk = fresh() - emit(` %${chk} =w ceql %fp, 0`) - if (has_handler && !in_handler) { - emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) - } else { - needs_exc_ret = true - emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) - } - emit(`@${chk}_ok`) + emit(` # mixed add: numeric add, text concat, else disrupt`) + emit(` %${p}_a_num =w copy ${emit_is_num_w(lhs)}`) + emit(` %${p}_b_num =w copy ${emit_is_num_w(rhs)}`) + emit(` %${p}_both_num =w and %${p}_a_num, %${p}_b_num`) + emit(` jnz %${p}_both_num, @${p}_num_add, @${p}_chk_text`) + emit(`@${p}_num_add`) + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d add ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) s_write(a1, `%${p}_r`) + emit(` jmp @${p}_done`) + emit(`@${p}_chk_text`) + emit(` %${p}_a_txt =w copy ${emit_is_text_w(lhs)}`) + emit(` %${p}_b_txt =w copy ${emit_is_text_w(rhs)}`) + emit(` %${p}_both_txt =w and %${p}_a_txt, %${p}_b_txt`) + emit(` jnz %${p}_both_txt, @${p}_txt_add, @${p}_bad`) + emit(`@${p}_txt_add`) + emit(` %fp =l call $__concat_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + emit_exc_check() + emit(` jmp @${p}_done`) + emit(`@${p}_bad`) + emit(` call $cell_rt_disrupt(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jmp @disruption_handler`) + } else { + emit(` ret 15`) + } emit(`@${p}_done`) continue } @@ -1082,17 +1596,10 @@ var qbe_emit = function(ir, qbe, export_name) { s_write(a1, `%${p}_tag`) emit(` jmp @${p}_done`) emit(`@${p}_slow`) - emit(` %${p}_r =l call $qbe_float_sub(l %ctx, l ${lhs}, l ${rhs})`) - emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) - chk = fresh() - emit(` %${chk} =w ceql %fp, 0`) - if (has_handler && !in_handler) { - emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) - } else { - needs_exc_ret = true - emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) - } - emit(`@${chk}_ok`) + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d sub ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) s_write(a1, `%${p}_r`) emit(`@${p}_done`) continue @@ -1120,40 +1627,55 @@ var qbe_emit = function(ir, qbe, export_name) { s_write(a1, `%${p}_tag`) emit(` jmp @${p}_done`) emit(`@${p}_slow`) - emit(` %${p}_r =l call $qbe_float_mul(l %ctx, l ${lhs}, l ${rhs})`) - emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) - chk = fresh() - emit(` %${chk} =w ceql %fp, 0`) - if (has_handler && !in_handler) { - emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) - } else { - needs_exc_ret = true - emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) - } - emit(`@${chk}_ok`) + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d mul ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) s_write(a1, `%${p}_r`) emit(`@${p}_done`) continue } if (op == "divide") { - emit(` %fp =l call $__div_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d div ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } if (op == "modulo") { - emit(` %fp =l call $__mod_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d call $fmod(d ${lhs_d}, d ${rhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } if (op == "negate") { - emit(` %fp =l call $__neg_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) - emit_exc_check() + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_rd =d neg ${lhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } if (op == "pow") { - emit(` %fp =l call $__pow_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d call $pow(d ${lhs_d}, d ${rhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } @@ -1168,27 +1690,45 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Type checks — use qbe.cm macros (no GC, no refresh) --- if (op == "is_int") { - emit(` call $__is_int_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + p = fresh() + emit(` %${p}_tag =l and ${v}, 1`) + emit(` %${p}_w =w ceql %${p}_tag, 0`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_text") { - emit(` call $__is_text_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + s_write(a1, emit_pack_bool_js(emit_is_text_w(v))) continue } if (op == "is_num") { - emit(` call $__is_num_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + s_write(a1, emit_pack_bool_js(emit_is_num_w(v))) continue } if (op == "is_bool") { - emit(` call $__is_bool_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + p = fresh() + emit(` %${p}_t5 =l and ${v}, 31`) + emit(` %${p}_w =w ceql %${p}_t5, 3`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_null") { - emit(` call $__is_null_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + p = fresh() + emit(` %${p}_t5 =l and ${v}, 31`) + emit(` %${p}_w =w ceql %${p}_t5, 7`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_identical") { - emit(` call $__is_identical_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_w =w ceql ${lhs}, ${rhs}`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_array") { @@ -1418,7 +1958,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__load_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1443,7 +1983,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__load_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1464,7 +2004,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__store_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1491,7 +2031,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__store_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1534,17 +2074,8 @@ var qbe_emit = function(ir, qbe, export_name) { jt_lbl = sanitize(a2) jt_idx = label_pos[jt_lbl] jt_backedge = jt_idx != null && jt_idx < instr_idx - emit(` %${p}_is_true =w ceql ${v}, ${text(qbe.js_true)}`) - emit(` jnz %${p}_is_true, @${p}_take, @${p}_chk_fast`) - emit(`@${p}_chk_fast`) - emit(` %${p}_tag =l and ${v}, 31`) - emit(` %${p}_is_bool =w ceql %${p}_tag, 3`) - emit(` %${p}_is_null =w ceql %${p}_tag, 7`) - emit(` %${p}_is_falsey =w or %${p}_is_bool, %${p}_is_null`) - emit(` jnz %${p}_is_falsey, @${p}_f, @${p}_tb`) - emit(`@${p}_tb`) - emit(` %${p}_tbv =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}_tbv, @${p}_take, @${p}_f`) + truthy = emit_truthy_w(v) + emit(` jnz ${truthy}, @${p}_take, @${p}_f`) emit(`@${p}_take`) if (jt_backedge) { emit_backedge_branch(jt_lbl) @@ -1560,17 +2091,8 @@ var qbe_emit = function(ir, qbe, export_name) { jf_lbl = sanitize(a2) jf_idx = label_pos[jf_lbl] jf_backedge = jf_idx != null && jf_idx < instr_idx - emit(` %${p}_is_true =w ceql ${v}, ${text(qbe.js_true)}`) - emit(` jnz %${p}_is_true, @${p}_t, @${p}_chk_fast`) - emit(`@${p}_chk_fast`) - emit(` %${p}_tag =l and ${v}, 31`) - emit(` %${p}_is_bool =w ceql %${p}_tag, 3`) - emit(` %${p}_is_null =w ceql %${p}_tag, 7`) - emit(` %${p}_is_fast_false =w or %${p}_is_bool, %${p}_is_null`) - emit(` jnz %${p}_is_fast_false, @${p}_take, @${p}_tb`) - emit(`@${p}_tb`) - emit(` %${p}_tbv =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}_tbv, @${p}_t, @${p}_take`) + truthy = emit_truthy_w(v) + emit(` jnz ${truthy}, @${p}_t, @${p}_take`) emit(`@${p}_take`) if (jf_backedge) { emit_backedge_branch(jf_lbl) @@ -1625,7 +2147,12 @@ var qbe_emit = function(ir, qbe, export_name) { if (op == "setarg") { v = s_read(a1) lhs = s_read(a3) - emit(` call $cell_rt_setarg(l ${v}, l ${text(a2)}, l ${lhs})`) + p = fresh() + // JSFrame layout: [header,function,caller,address,slots...] + // slots start at byte offset 32. + emit(` %${p}_fr =l and ${v}, -8`) + emit(` %${p}_slot =l add %${p}_fr, ${text(32 + a2 * 8)}`) + emit(` storel ${lhs}, %${p}_slot`) continue } if (op == "invoke") { @@ -1640,9 +2167,11 @@ var qbe_emit = function(ir, qbe, export_name) { emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) emit(" ret 0") emit(`@_seg${text(seg_num)}`) - // Check for exception after dispatch loop resumes us + // Check for exception marker in destination slot after resume. + // Dispatch writes JS_EXCEPTION into ret_slot on exceptional return. + rv = s_read(a2) p = fresh() - emit(` %${p} =w call $JS_HasException(l %ctx)`) + emit(` %${p} =w ceql ${rv}, ${text(qbe.js_exception)}`) if (has_handler && !in_handler) { emit(` jnz %${p}, @disruption_handler, @${p}_ok`) } else { @@ -1760,7 +2289,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__delete_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__delete_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__delete_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1783,7 +2312,7 @@ var qbe_emit = function(ir, qbe, export_name) { // IR: ["regexp", dest_slot, pattern_string, flags_string] pat_label = intern_str(a2) flg_label = intern_str(a3) - emit(` %fp =l call $__regexp_ss(l %ctx, l %fp, l ${text(a1)}, l ${pat_label}, l ${flg_label})`) + emit(` %fp =l call $__regexp_ss(l %ctx, l %fp, l ${text(a1)}, l ${pat_label.label}, l ${flg_label.label})`) emit_exc_check() continue } @@ -1830,6 +2359,16 @@ var qbe_emit = function(ir, qbe, export_name) { // Export nr_slots for main function so the module loader can use right-sized frames var main_name = export_name ? sanitize(export_name) : "cell_main" push(data_out, `export data $${main_name}_nr_slots = { w ${text(ir.main.nr_slots)} }`) + push(data_out, `export data $cell_lit_count = { w ${text(length(str_entries))} }`) + if (length(str_entries) > 0) { + lit_data = [] + si = 0 + while (si < length(str_entries)) { + push(lit_data, `l ${str_entries[si].label}`) + si = si + 1 + } + push(data_out, `export data $cell_lit_table = { ${text(lit_data, ", ")} }`) + } return { data: text(data_out, "\n"), diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 31154a24..8d638a61 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -230,23 +230,188 @@ JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b) { /* --- Property access --- */ -JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) { +/* Current module handle for active native dispatch. */ +static CELL_THREAD_LOCAL void *g_current_dl_handle = NULL; + +typedef struct { + void *dl_handle; + JSContext *ctx; + JSGCRef *vals; + int count; +} AOTLiteralPool; + +static CELL_THREAD_LOCAL AOTLiteralPool g_aot_lit_pool = {0}; + +static void aot_clear_lit_pool(void) { + if (g_aot_lit_pool.vals) { + if (g_aot_lit_pool.ctx) { + for (int i = 0; i < g_aot_lit_pool.count; i++) + JS_DeleteGCRef(g_aot_lit_pool.ctx, &g_aot_lit_pool.vals[i]); + } + free(g_aot_lit_pool.vals); + } + g_aot_lit_pool.dl_handle = NULL; + g_aot_lit_pool.ctx = NULL; + g_aot_lit_pool.vals = NULL; + g_aot_lit_pool.count = 0; +} + +static int aot_load_lit_pool(JSContext *ctx, void *dl_handle) { + aot_clear_lit_pool(); + g_aot_lit_pool.dl_handle = dl_handle; + g_aot_lit_pool.ctx = ctx; + if (!dl_handle) + return 1; + + int *count_ptr = (int *)dlsym(dl_handle, "cell_lit_count"); + const char **table_ptr = (const char **)dlsym(dl_handle, "cell_lit_table"); + int count = count_ptr ? *count_ptr : 0; + if (count <= 0 || !table_ptr) + return 1; + + g_aot_lit_pool.vals = (JSGCRef *)calloc((size_t)count, sizeof(JSGCRef)); + if (!g_aot_lit_pool.vals) { + JS_RaiseOOM(ctx); + return 0; + } + g_aot_lit_pool.count = 0; + + for (int i = 0; i < count; i++) { + const char *cstr = table_ptr[i] ? table_ptr[i] : ""; + JS_AddGCRef(ctx, &g_aot_lit_pool.vals[i]); + g_aot_lit_pool.count = i + 1; + g_aot_lit_pool.vals[i].val = js_key_new(ctx, cstr); + if (JS_IsException(g_aot_lit_pool.vals[i].val)) { + aot_clear_lit_pool(); + return 0; + } + } + return 1; +} + +static JSValue aot_lit_from_index(JSContext *ctx, int64_t lit_idx) { + if (lit_idx < 0) { + JS_RaiseDisrupt(ctx, "literal index out of range"); + return JS_EXCEPTION; + } + + if (g_aot_lit_pool.dl_handle != g_current_dl_handle || g_aot_lit_pool.ctx != ctx) { + if (!aot_load_lit_pool(ctx, g_current_dl_handle)) + return JS_EXCEPTION; + } + + if (lit_idx >= g_aot_lit_pool.count) { + JS_RaiseDisrupt(ctx, "literal index out of range"); + return JS_EXCEPTION; + } + return g_aot_lit_pool.vals[lit_idx].val; +} + +typedef struct { + const char *name; + JSValue key; +} AOTKeyCacheEntry; + +static CELL_THREAD_LOCAL JSContext *g_aot_key_cache_ctx = NULL; +static CELL_THREAD_LOCAL AOTKeyCacheEntry *g_aot_key_cache = NULL; +static CELL_THREAD_LOCAL int g_aot_key_cache_count = 0; +static CELL_THREAD_LOCAL int g_aot_key_cache_cap = 0; + +/* Convert a static C string to an interned JSValue key. + Uses a small thread-local cache keyed by C-string pointer to avoid + repeated UTF-8 decoding in hot property paths. */ +static JSValue aot_key_from_cstr(JSContext *ctx, const char *name) { + if (!name) + return JS_NULL; + + if (g_aot_key_cache_ctx != ctx) { + free(g_aot_key_cache); + g_aot_key_cache = NULL; + g_aot_key_cache_count = 0; + g_aot_key_cache_cap = 0; + g_aot_key_cache_ctx = ctx; + } + + for (int i = 0; i < g_aot_key_cache_count; i++) { + if (g_aot_key_cache[i].name == name) + return g_aot_key_cache[i].key; + } + + JSValue key = js_key_new(ctx, name); + if (JS_IsNull(key)) + return JS_RaiseDisrupt(ctx, "invalid property key"); + + if (g_aot_key_cache_count >= g_aot_key_cache_cap) { + int new_cap = g_aot_key_cache_cap ? (g_aot_key_cache_cap * 2) : 64; + AOTKeyCacheEntry *new_cache = + (AOTKeyCacheEntry *)realloc(g_aot_key_cache, (size_t)new_cap * sizeof(*new_cache)); + if (!new_cache) + return JS_RaiseOOM(ctx); + g_aot_key_cache = new_cache; + g_aot_key_cache_cap = new_cap; + } + + g_aot_key_cache[g_aot_key_cache_count].name = name; + g_aot_key_cache[g_aot_key_cache_count].key = key; + g_aot_key_cache_count++; + return key; +} + +static JSValue cell_rt_load_field_key(JSContext *ctx, JSValue obj, JSValue key) { if (JS_IsFunction(obj)) { JS_RaiseDisrupt(ctx, "cannot read property of function"); return JS_EXCEPTION; } - return JS_GetPropertyStr(ctx, obj, name); + return JS_GetProperty(ctx, obj, key); +} + +JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_load_field_key(ctx, obj, key); +} + +JSValue cell_rt_load_field_lit(JSContext *ctx, JSValue obj, int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_load_field_key(ctx, obj, key); } /* Like cell_rt_load_field but without the function guard. Used by load_dynamic when the key happens to be a static string. */ JSValue cell_rt_load_prop_str(JSContext *ctx, JSValue obj, const char *name) { - return JS_GetPropertyStr(ctx, obj, name); + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return JS_GetProperty(ctx, obj, key); } -void cell_rt_store_field(JSContext *ctx, JSValue val, JSValue obj, - const char *name) { - JS_SetPropertyStr(ctx, obj, name, val); +static int cell_rt_store_field_key(JSContext *ctx, JSValue val, JSValue obj, + JSValue key) { + int ret = JS_SetProperty(ctx, obj, key, val); + return (ret < 0 || JS_HasException(ctx)) ? 0 : 1; +} + +int cell_rt_store_field(JSContext *ctx, JSValue val, JSValue obj, + const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return 0; + return cell_rt_store_field_key(ctx, val, obj, key); +} + +int cell_rt_store_field_lit(JSContext *ctx, JSValue val, JSValue obj, + int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return 0; + return cell_rt_store_field_key(ctx, val, obj, key); +} + +JSValue cell_rt_access_lit(JSContext *ctx, int64_t lit_idx) { + return aot_lit_from_index(ctx, lit_idx); } JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) { @@ -255,16 +420,22 @@ JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) { return JS_GetProperty(ctx, obj, key); } -void cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj, - JSValue key) { +int cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj, + JSValue key) { + int ret = 0; + JSValue nr = JS_NULL; if (JS_IsInt(key)) { - JS_SetPropertyNumber(ctx, obj, (uint32_t)JS_VALUE_GET_INT(key), val); + nr = JS_SetPropertyNumber(ctx, obj, (uint32_t)JS_VALUE_GET_INT(key), val); + return JS_IsException(nr) ? 0 : 1; } else if (JS_IsArray(obj) && !JS_IsInt(key)) { JS_RaiseDisrupt(ctx, "array index must be a number"); + return 0; } else if (JS_IsBool(key) || JS_IsNull(key) || JS_IsArray(key) || JS_IsFunction(key)) { JS_RaiseDisrupt(ctx, "object key must be text"); + return 0; } else { - JS_SetProperty(ctx, obj, key, val); + ret = JS_SetProperty(ctx, obj, key, val); + return (ret < 0 || JS_HasException(ctx)) ? 0 : 1; } } @@ -274,12 +445,17 @@ JSValue cell_rt_load_index(JSContext *ctx, JSValue arr, JSValue idx) { return JS_GetProperty(ctx, arr, idx); } -void cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, - JSValue idx) { +int cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, + JSValue idx) { + int ret = 0; + JSValue nr = JS_NULL; if (JS_IsInt(idx)) - JS_SetPropertyNumber(ctx, arr, (uint32_t)JS_VALUE_GET_INT(idx), val); + nr = JS_SetPropertyNumber(ctx, arr, (uint32_t)JS_VALUE_GET_INT(idx), val); else - JS_SetProperty(ctx, arr, idx, val); + ret = JS_SetProperty(ctx, arr, idx, val); + if (JS_IsInt(idx)) + return JS_IsException(nr) ? 0 : 1; + return (ret < 0 || JS_HasException(ctx)) ? 0 : 1; } /* --- Intrinsic/global lookup --- */ @@ -294,6 +470,17 @@ void cell_rt_set_native_env(JSContext *ctx, JSValue env) { fprintf(stderr, "cell_rt_set_native_env: ERROR env not stone\n"); abort(); } + /* Drop module literal pool roots before switching native env/module. */ + aot_clear_lit_pool(); + + /* Native module boundary: clear per-thread key cache so stale keys + cannot survive across context/module lifetimes. */ + free(g_aot_key_cache); + g_aot_key_cache = NULL; + g_aot_key_cache_count = 0; + g_aot_key_cache_cap = 0; + g_aot_key_cache_ctx = ctx; + if (g_has_native_env) JS_DeleteGCRef(ctx, &g_native_env_ref); if (!JS_IsNull(env)) { @@ -305,10 +492,10 @@ void cell_rt_set_native_env(JSContext *ctx, JSValue env) { } } -JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { +static JSValue cell_rt_get_intrinsic_key(JSContext *ctx, JSValue key) { /* Check native env first (runtime-provided functions like log) */ if (g_has_native_env) { - JSValue v = JS_GetPropertyStr(ctx, g_native_env_ref.val, name); + JSValue v = JS_GetProperty(ctx, g_native_env_ref.val, key); if (!JS_IsNull(v)) return v; } @@ -319,14 +506,28 @@ JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { JSRecord *rec = (JSRecord *)chase(gobj); uint64_t mask = objhdr_cap56(rec->mist_hdr); for (uint64_t i = 1; i <= mask; i++) { - if (js_key_equal_str(rec->slots[i].key, name)) + if (js_key_equal(rec->slots[i].key, key)) return rec->slots[i].val; } } - JS_RaiseDisrupt(ctx, "'%s' is not defined", name); + JS_RaiseDisrupt(ctx, "name is not defined"); return JS_EXCEPTION; } +JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_get_intrinsic_key(ctx, key); +} + +JSValue cell_rt_get_intrinsic_lit(JSContext *ctx, int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_get_intrinsic_key(ctx, key); +} + /* --- Closure access --- Walk the outer_frame chain on JSFunction (JS_FUNC_KIND_NATIVE). The frame's function field links to the JSFunction, whose @@ -436,6 +637,17 @@ JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { return (JSValue *)frame->slots; } +/* Push an already-allocated frame onto the active AOT frame stack. */ +static int cell_rt_push_existing_frame(JSContext *ctx, JSValue frame_val) { + if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) + return 0; + JSGCRef *ref = aot_gc_ref_at(g_aot_depth); + JS_AddGCRef(ctx, ref); + ref->val = frame_val; + g_aot_depth++; + return 1; +} + JSValue *cell_rt_refresh_fp(JSContext *ctx) { (void)ctx; if (g_aot_depth <= 0) { @@ -484,7 +696,7 @@ typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); /* Set before executing a native module's cell_main — used by cell_rt_make_function to resolve fn_ptr via dlsym */ -static CELL_THREAD_LOCAL void *g_current_dl_handle = NULL; +/* g_current_dl_handle is defined near property/literal helpers. */ /* ============================================================ Dispatch loop — the core of native function execution. @@ -604,8 +816,9 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JS_PushGCRef(ctx, &callee_ref); callee_ref.val = callee_frame_val; JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); - int callee_argc = (int)objhdr_cap56(callee_fr->header); - callee_argc = (callee_argc >= 2) ? callee_argc - 2 : 0; + int callee_argc = JS_VALUE_GET_INT(callee_fr->address); + if (callee_argc < 0) + callee_argc = 0; JSValue callee_fn_val = callee_fr->function; if (!JS_IsFunction(callee_fn_val)) { @@ -625,47 +838,44 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (callee_fn->kind == JS_FUNC_KIND_NATIVE) { /* Native-to-native call — no C stack growth */ cell_compiled_fn callee_ptr = (cell_compiled_fn)callee_fn->u.native.fn_ptr; - int callee_slots = callee_fn->u.native.nr_slots; if (pending_is_tail) { - /* Tail call: replace frame instead of mutating in place. - In-place reuse breaks closures that captured the caller frame. */ + /* Tail call: replace current frame with the prepared callee frame. */ JSValue saved_caller = frame->caller; - int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; - if (cc < 0) cc = callee_argc; /* Pop old frame */ cell_rt_leave_frame(ctx); - /* Push new right-sized frame */ - JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); - if (!new_fp) { + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + callee_fn_val = callee_fn_ref.val; + callee_fr->function = callee_fn_val; + callee_fr->caller = saved_caller; + callee_fr->address = JS_NewInt32(ctx, 0); + + if (!cell_rt_push_existing_frame(ctx, callee_ref.val)) { JS_PopGCRef(ctx, &callee_fn_ref); JS_PopGCRef(ctx, &callee_ref); RETURN_DISPATCH(JS_EXCEPTION); } - callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); - JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); - callee_fn_val = callee_fn_ref.val; - new_frame->function = callee_fn_val; - new_frame->caller = saved_caller; - new_frame->slots[0] = callee_fr->slots[0]; - for (int i = 0; i < cc && i < callee_slots - 1; i++) - new_frame->slots[1 + i] = callee_fr->slots[1 + i]; - frame = new_frame; - fp = new_fp; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(g_aot_depth - 1)->val); + fp = (JSValue *)frame->slots; fn = callee_ptr; } else { - /* Regular call: push new frame, link caller */ + /* Regular call: link caller and push prepared callee frame. */ int ret_info = JS_VALUE_GET_INT(frame->address); int resume_seg = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; - int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; - if (cc < 0) cc = callee_argc; + /* Save return address in caller */ + frame->address = JS_NewInt32(ctx, (resume_seg << 16) | ret_slot); - JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); - if (!new_fp) { + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + callee_fn_val = callee_fn_ref.val; + callee_fr->function = callee_fn_val; + callee_fr->caller = JS_MKPTR(frame); + callee_fr->address = JS_NewInt32(ctx, 0); + + if (!cell_rt_push_existing_frame(ctx, callee_ref.val)) { /* Resume caller with exception pending */ frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); @@ -676,29 +886,8 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JS_PopGCRef(ctx, &callee_ref); continue; } - callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); - - /* Re-derive caller frame after alloc */ - if (g_aot_depth <= 1) { - fprintf(stderr, "[BUG] native dispatch bad depth while linking caller: %d\n", g_aot_depth); - abort(); - } - frame_val = aot_gc_ref_at(g_aot_depth - 2)->val; - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); - - JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); - callee_fn_val = callee_fn_ref.val; - new_frame->function = callee_fn_val; - new_frame->caller = JS_MKPTR(frame); - new_frame->slots[0] = callee_fr->slots[0]; - for (int i = 0; i < cc && i < callee_slots - 1; i++) - new_frame->slots[1 + i] = callee_fr->slots[1 + i]; - - /* Save return address in caller */ - frame->address = JS_NewInt32(ctx, (resume_seg << 16) | ret_slot); - - frame = new_frame; - fp = new_fp; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(g_aot_depth - 1)->val); + fp = (JSValue *)frame->slots; fn = callee_ptr; } } else { @@ -719,11 +908,16 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (JS_IsException(ret)) { /* Non-native callee threw — resume caller with exception pending. - The caller's generated code checks JS_HasException at resume. */ + Tag the pending return slot with JS_EXCEPTION so generated code + can branch without an extra JS_HasException C call. */ if (!JS_HasException(ctx)) JS_Disrupt(ctx); + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = JS_EXCEPTION; /* fn and fp still point to the calling native function's frame. - Just resume it — it will detect the exception. */ + Just resume it — it will detect JS_EXCEPTION in the return slot. */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; JS_PopGCRef(ctx, &callee_ref); @@ -789,10 +983,14 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, RETURN_DISPATCH(JS_EXCEPTION); } - /* Resume caller — it will check JS_HasException and branch to handler */ + /* Resume caller and tag the return slot with JS_EXCEPTION. */ frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = JS_EXCEPTION; JSFunction *exc_caller_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_caller_fn->u.native.fn_ptr; @@ -859,9 +1057,13 @@ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) { return JS_EXCEPTION; } int nr_slots = (int)nargs + 2; + JSFunction *f = JS_VALUE_GET_FUNCTION(fn); + if (f->kind == JS_FUNC_KIND_NATIVE && f->u.native.nr_slots > nr_slots) + nr_slots = f->u.native.nr_slots; JSFrameRegister *new_frame = alloc_frame_register(ctx, nr_slots); if (!new_frame) return JS_EXCEPTION; new_frame->function = fn; + new_frame->address = JS_NewInt32(ctx, (int)nargs); return JS_MKPTR(new_frame); } @@ -875,8 +1077,8 @@ void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) { JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { if (frame_val == JS_EXCEPTION) return JS_EXCEPTION; JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); - int nr_slots = (int)objhdr_cap56(fr->header); - int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0; + int c_argc = JS_VALUE_GET_INT(fr->address); + if (c_argc < 0) c_argc = 0; JSValue fn_val = fr->function; if (!JS_IsFunction(fn_val)) { @@ -933,14 +1135,27 @@ JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key) { return JS_NewBool(ctx, ret >= 0); } -JSValue cell_rt_delete_str(JSContext *ctx, JSValue obj, const char *name) { - JSValue key = JS_NewString(ctx, name); +static JSValue cell_rt_delete_key(JSContext *ctx, JSValue obj, JSValue key) { int ret = JS_DeleteProperty(ctx, obj, key); if (ret < 0) return JS_EXCEPTION; return JS_NewBool(ctx, ret >= 0); } +JSValue cell_rt_delete_str(JSContext *ctx, JSValue obj, const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_delete_key(ctx, obj, key); +} + +JSValue cell_rt_delete_lit(JSContext *ctx, JSValue obj, int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_delete_key(ctx, obj, key); +} + /* --- Typeof --- */ JSValue cell_rt_typeof(JSContext *ctx, JSValue val) { From a05d0e25250979dfcb3d50fffe70b0347a9a5835 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 20:56:15 -0600 Subject: [PATCH 5/9] better streamline --- build.cm | 2 +- qbe_emit.cm | 86 ++++++++++++- streamline.cm | 348 +++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 387 insertions(+), 49 deletions(-) diff --git a/build.cm b/build.cm index b181cc8e..b233b527 100644 --- a/build.cm +++ b/build.cm @@ -81,7 +81,7 @@ function content_hash(str) { } // Bump when native codegen/runtime ABI changes so stale dylibs are not reused. -def NATIVE_CACHE_VERSION = "native-v16" +def NATIVE_CACHE_VERSION = "native-v22" // Enable AOT ASan by creating .cell/asan_aot in the package root. function native_sanitize_flags() { diff --git a/qbe_emit.cm b/qbe_emit.cm index 43674cd6..a5e2734d 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -1130,6 +1130,15 @@ var qbe_emit = function(ir, qbe, export_name) { var truthy = null var lhs_d = null var rhs_d = null + var peek1 = null + var peek2 = null + var peek3 = null + var peek4 = null + var peek5 = null + var floor_frame_slot = 0 + var floor_this_slot = 0 + var floor_arg_slot = 0 + var floor_dest_slot = 0 // Pre-scan: count invoke/tail_invoke points to assign segment numbers. // Must skip dead code (instructions after terminators) the same way @@ -1141,15 +1150,43 @@ var qbe_emit = function(ir, qbe, export_name) { scan = instrs[si] si = si + 1 if (is_text(scan)) { + // Skip optimizer nop pseudo-labels entirely. + if (starts_with(scan, "_nop_")) continue label_pos[sanitize(scan)] = si - 1 - // Labels reset dead code state (unless they're nop pseudo-labels) - if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_")) - scan_dead = false + // Real labels reset dead code state. + scan_dead = false continue } if (scan_dead) continue if (!is_array(scan)) continue scan_op = scan[0] + + // Keep invoke segment counting consistent with main-loop peephole: + // inline floor intrinsic call sequence does not emit an invoke. + if (false && scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "floor") { + if (si + 4 < length(instrs)) { + peek1 = instrs[si] + peek2 = instrs[si + 1] + peek3 = instrs[si + 2] + peek4 = instrs[si + 3] + peek5 = instrs[si + 4] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + floor_frame_slot = peek1[1] + floor_this_slot = peek2[1] + if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot && + peek4[1] == floor_frame_slot && peek4[2] == 1 && + peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) { + si = si + 5 + continue + } + } + } + } + if (scan_op == "invoke") { invoke_count = invoke_count + 1 } @@ -1427,7 +1464,7 @@ var qbe_emit = function(ir, qbe, export_name) { // Labels are plain strings; skip nop pseudo-labels from streamline if (is_text(instr)) { - if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue + if (starts_with(instr, "_nop_")) continue lbl = sanitize(instr) if (!last_was_term) { emit(` jmp @${lbl}`) @@ -1446,6 +1483,47 @@ var qbe_emit = function(ir, qbe, export_name) { a3 = instr[3] last_was_term = false + // Peephole: inline `floor(x)` intrinsic call sequence + // access floor; frame; null this; setarg 0 this; setarg 1 x; invoke + if (false && op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "floor") { + if (instr_idx + 5 < length(instrs)) { + peek1 = instrs[instr_idx + 1] + peek2 = instrs[instr_idx + 2] + peek3 = instrs[instr_idx + 3] + peek4 = instrs[instr_idx + 4] + peek5 = instrs[instr_idx + 5] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + floor_frame_slot = peek1[1] + floor_this_slot = peek2[1] + if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot && + peek4[1] == floor_frame_slot && peek4[2] == 1 && + peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) { + floor_arg_slot = peek4[3] + floor_dest_slot = peek5[2] + v = s_read(floor_arg_slot) + p = fresh() + emit(` %${p}_is_num =w copy ${emit_is_num_w(v)}`) + emit(` jnz %${p}_is_num, @${p}_ok, @${p}_bad`) + emit(`@${p}_bad`) + s_write(floor_dest_slot, text(qbe.js_null)) + emit(` jmp @${p}_done`) + emit(`@${p}_ok`) + lhs_d = emit_num_to_double(v) + emit(` %${p}_fd =d call $floor(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_fd)`) + s_write(floor_dest_slot, `%${p}_r`) + emit(`@${p}_done`) + i = instr_idx + 6 + continue + } + } + } + } + // --- Constants --- if (op == "int") { diff --git a/streamline.cm b/streamline.cm index 545a1862..16fcd8fb 100644 --- a/streamline.cm +++ b/streamline.cm @@ -250,8 +250,13 @@ var streamline = function(ir, log) { var param_types = null var i = 0 var j = 0 + var iter = 0 var instr = null var bt = null + var src = 0 + var dst = 0 + var old_bt = null + var changed = false var rule = null if (instructions == null || nr_args == 0) { @@ -275,6 +280,31 @@ var streamline = function(ir, log) { i = i + 1 } + // Propagate typed constraints backward through move chains. + changed = true + iter = 0 + while (changed && iter < num_instr + 4) { + changed = false + i = 0 + while (i < num_instr) { + instr = instructions[i] + if (is_array(instr) && instr[0] == "move") { + dst = instr[1] + src = instr[2] + bt = backward_types[dst] + if (bt != null && bt != T_UNKNOWN) { + old_bt = backward_types[src] + merge_backward(backward_types, src, bt) + if (backward_types[src] != old_bt) { + changed = true + } + } + } + i = i + 1 + } + iter = iter + 1 + } + param_types = array(func.nr_slots) j = 1 while (j <= nr_args) { @@ -325,16 +355,35 @@ var streamline = function(ir, log) { is_record: [1, T_BOOL], is_stone: [1, T_BOOL] } - var infer_slot_write_types = function(func) { + // Known intrinsic return types for invoke result inference. + var intrinsic_return_types = { + abs: T_NUM, floor: T_NUM, ceiling: T_NUM, + round: T_NUM, trunc: T_NUM, fraction: T_NUM, + integer: T_NUM, sign: T_NUM, + max: T_NUM, min: T_NUM + } + + var infer_slot_write_types = function(func, param_types) { var instructions = func.instructions var nr_args = func.nr_args != null ? func.nr_args : 0 var num_instr = 0 var write_types = null + var frame_callee = null + var intrinsic_slots = null + var move_dests = null + var move_srcs = null var i = 0 var k = 0 + var iter = 0 var instr = null + var op = null + var src = 0 var slot = 0 + var old_typ = null + var src_typ = null var typ = null + var callee_slot = null + var changed = false var rule = null var cw_keys = null @@ -344,11 +393,62 @@ var streamline = function(ir, log) { num_instr = length(instructions) write_types = array(func.nr_slots) + frame_callee = array(func.nr_slots) + intrinsic_slots = array(func.nr_slots) + move_dests = [] + move_srcs = [] i = 0 while (i < num_instr) { instr = instructions[i] if (is_array(instr)) { - rule = write_rules[instr[0]] + op = instr[0] + if (op == "access") { + slot = instr[1] + if (slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, access_value_type(instr[2])) + } + if (is_object(instr[2]) && instr[2].make == "intrinsic") { + typ = intrinsic_return_types[instr[2].name] + if (typ != null && slot >= 0 && slot < length(intrinsic_slots)) { + intrinsic_slots[slot] = typ + } + } + i = i + 1 + continue + } + if (op == "move") { + slot = instr[1] + if (slot > 0 && slot > nr_args) { + move_dests[] = slot + move_srcs[] = instr[2] + } + i = i + 1 + continue + } + if (op == "frame" || op == "goframe") { + if (is_number(instr[1]) && instr[1] >= 0 && instr[1] < length(frame_callee)) { + frame_callee[instr[1]] = instr[2] + } + i = i + 1 + continue + } + if (op == "invoke" || op == "tail_invoke") { + slot = instr[2] + typ = T_UNKNOWN + callee_slot = frame_callee[instr[1]] + if (is_number(callee_slot) && callee_slot >= 0 && callee_slot < length(intrinsic_slots)) { + if (intrinsic_slots[callee_slot] != null) { + typ = intrinsic_slots[callee_slot] + } + } + if (slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, typ) + } + i = i + 1 + continue + } + + rule = write_rules[op] if (rule != null) { slot = instr[rule[0]] typ = rule[1] @@ -363,6 +463,54 @@ var streamline = function(ir, log) { i = i + 1 } + // Resolve move writes from known source invariants (fixed-point). + changed = true + iter = 0 + while (changed && iter < length(write_types) + 4) { + changed = false + k = 0 + while (k < length(move_dests)) { + slot = move_dests[k] + src = move_srcs[k] + src_typ = null + if (is_number(src) && src >= 0) { + if (src < length(write_types) && write_types[src] != null) { + src_typ = write_types[src] + } else if (param_types != null && src < length(param_types) && param_types[src] != null) { + src_typ = param_types[src] + } + } + if (src_typ != null) { + old_typ = write_types[slot] + merge_backward(write_types, slot, src_typ) + if (write_types[slot] != old_typ) { + changed = true + } + } + k = k + 1 + } + iter = iter + 1 + } + + // Any remaining unresolved move write can carry arbitrary type. + k = 0 + while (k < length(move_dests)) { + slot = move_dests[k] + src = move_srcs[k] + src_typ = null + if (is_number(src) && src >= 0) { + if (src < length(write_types) && write_types[src] != null) { + src_typ = write_types[src] + } else if (param_types != null && src < length(param_types) && param_types[src] != null) { + src_typ = param_types[src] + } + } + if (src_typ == null && slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, T_UNKNOWN) + } + k = k + 1 + } + // Closure-written slots can have any type at runtime — mark unknown if (func.closure_written != null) { cw_keys = array(func.closure_written) @@ -976,6 +1124,94 @@ var streamline = function(ir, log) { return null } + // ========================================================= + // Pass: eliminate_unreachable_cfg — nop blocks not reachable + // from function entry under explicit jump control-flow. + // ========================================================= + var eliminate_unreachable_cfg = function(func) { + var instructions = func.instructions + var num_instr = 0 + var disruption_pc = -1 + var label_map = null + var reachable = null + var stack = null + var sp = 0 + var idx = 0 + var tgt = null + var instr = null + var op = null + var nc = 0 + + if (instructions == null || length(instructions) == 0) { + return null + } + + num_instr = length(instructions) + if (is_number(func.disruption_pc)) { + disruption_pc = func.disruption_pc + } + label_map = {} + idx = 0 + while (idx < num_instr) { + instr = instructions[idx] + if (is_text(instr) && !starts_with(instr, "_nop_")) { + label_map[instr] = idx + } + idx = idx + 1 + } + + reachable = array(num_instr, false) + stack = [0] + if (disruption_pc > 0 && disruption_pc < num_instr) { + stack[] = disruption_pc + } + + sp = 0 + while (sp < length(stack)) { + idx = stack[sp] + sp = sp + 1 + + if (idx < 0 || idx >= num_instr || reachable[idx]) { + continue + } + reachable[idx] = true + instr = instructions[idx] + + if (!is_array(instr)) { + stack[] = idx + 1 + continue + } + + op = instr[0] + if (op == "jump") { + tgt = label_map[instr[1]] + if (is_number(tgt)) stack[] = tgt + continue + } + if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") { + tgt = label_map[instr[2]] + if (is_number(tgt)) stack[] = tgt + stack[] = idx + 1 + continue + } + if (op == "return" || op == "disrupt") { + continue + } + stack[] = idx + 1 + } + + idx = 0 + while (idx < num_instr) { + if (!reachable[idx] && is_array(instructions[idx])) { + nc = nc + 1 + instructions[idx] = "_nop_ucfg_" + text(nc) + } + idx = idx + 1 + } + + return null + } + // ========================================================= // Pass: eliminate_dead_jumps — jump to next label → nop // ========================================================= @@ -1590,51 +1826,75 @@ var streamline = function(ir, log) { var param_types = null var write_types = null var slot_types = null + var run_cycle = function(suffix) { + var name = null + name = "infer_param_types" + suffix + run_pass(func, name, function() { + param_types = infer_param_types(func) + return param_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "infer_slot_write_types" + suffix + run_pass(func, name, function() { + write_types = infer_slot_write_types(func, param_types) + return write_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_type_checks" + suffix + run_pass(func, name, function() { + slot_types = eliminate_type_checks(func, param_types, write_types, log) + return slot_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + if (log != null && log.type_deltas != null && slot_types != null) { + log.type_deltas[] = { + fn: func.name, + cycle: suffix == "" ? 1 : 2, + param_types: param_types, + slot_types: slot_types + } + } + + name = "simplify_algebra" + suffix + run_pass(func, name, function() { + return simplify_algebra(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "simplify_booleans" + suffix + run_pass(func, name, function() { + return simplify_booleans(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_moves" + suffix + run_pass(func, name, function() { + return eliminate_moves(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_unreachable" + suffix + run_pass(func, name, function() { + return eliminate_unreachable(func) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_dead_jumps" + suffix + run_pass(func, name, function() { + return eliminate_dead_jumps(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + return null + } + if (func.instructions == null || length(func.instructions) == 0) { return null } - run_pass(func, "infer_param_types", function() { - param_types = infer_param_types(func) - return param_types - }) - if (verify_fn) verify_fn(func, "after infer_param_types") - run_pass(func, "infer_slot_write_types", function() { - write_types = infer_slot_write_types(func) - return write_types - }) - if (verify_fn) verify_fn(func, "after infer_slot_write_types") - run_pass(func, "eliminate_type_checks", function() { - slot_types = eliminate_type_checks(func, param_types, write_types, log) - return slot_types - }) - if (verify_fn) verify_fn(func, "after eliminate_type_checks") - if (log != null && log.type_deltas != null && slot_types != null) { - log.type_deltas[] = { - fn: func.name, - param_types: param_types, - slot_types: slot_types - } - } - run_pass(func, "simplify_algebra", function() { - return simplify_algebra(func, log) - }) - if (verify_fn) verify_fn(func, "after simplify_algebra") - run_pass(func, "simplify_booleans", function() { - return simplify_booleans(func, log) - }) - if (verify_fn) verify_fn(func, "after simplify_booleans") - run_pass(func, "eliminate_moves", function() { - return eliminate_moves(func, log) - }) - if (verify_fn) verify_fn(func, "after eliminate_moves") - run_pass(func, "eliminate_unreachable", function() { - return eliminate_unreachable(func) - }) - if (verify_fn) verify_fn(func, "after eliminate_unreachable") - run_pass(func, "eliminate_dead_jumps", function() { - return eliminate_dead_jumps(func, log) - }) - if (verify_fn) verify_fn(func, "after eliminate_dead_jumps") + + run_cycle("") return null } From 27ca008f18cd0ceaa931d2c195d239346718a289 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 21:18:18 -0600 Subject: [PATCH 6/9] lower ops directly --- mcode.cm | 94 +++++++++++++++++++ qbe_emit.cm | 185 +++++++++++++++++++++++++++++++++++++- source/mach.c | 181 ++++++++++++++++++++++++++++++++++--- source/quickjs-internal.h | 22 +++++ streamline.cm | 20 +++-- 5 files changed, 482 insertions(+), 20 deletions(-) diff --git a/mcode.cm b/mcode.cm index 8a8362a3..ebad4bf3 100644 --- a/mcode.cm +++ b/mcode.cm @@ -41,6 +41,28 @@ var mcode = function(ast) { length: "length" } + // Numeric intrinsic lowering maps (Tier 1 direct mcode). + var intrinsic_num_unary_ops = { + abs: "abs", + sign: "sign", + fraction: "fraction", + integer: "integer", + whole: "integer", + neg: "negate" + } + var intrinsic_num_binary_ops = { + modulo: "modulo", + remainder: "remainder", + max: "max", + min: "min" + } + var intrinsic_num_place_ops = { + floor: "floor", + ceiling: "ceiling", + round: "round", + trunc: "trunc" + } + // Compiler state var s_instructions = null var s_data = null @@ -877,6 +899,56 @@ var mcode = function(ast) { } } + // Intrinsic numeric helpers: + // preserve native intrinsic behavior for bad argument types by returning null. + var emit_intrinsic_num_unary = function(op, arg_slot) { + var dest = alloc_slot() + var t = alloc_slot() + var bad = gen_label(op + "_arg_bad") + var done = gen_label(op + "_arg_done") + emit_2("is_num", t, arg_slot) + emit_jump_cond("jump_false", t, bad) + emit_2(op, dest, arg_slot) + emit_jump(done) + emit_label(bad) + emit_1("null", dest) + emit_label(done) + return dest + } + + var emit_intrinsic_num_binary = function(op, left_slot, right_slot) { + var dest = alloc_slot() + var t0 = alloc_slot() + var t1 = alloc_slot() + var bad = gen_label(op + "_arg_bad") + var done = gen_label(op + "_arg_done") + emit_2("is_num", t0, left_slot) + emit_jump_cond("jump_false", t0, bad) + emit_2("is_num", t1, right_slot) + emit_jump_cond("jump_false", t1, bad) + emit_3(op, dest, left_slot, right_slot) + emit_jump(done) + emit_label(bad) + emit_1("null", dest) + emit_label(done) + return dest + } + + var emit_intrinsic_num_place = function(op, value_slot, place_slot) { + var dest = alloc_slot() + var t = alloc_slot() + var bad = gen_label(op + "_arg_bad") + var done = gen_label(op + "_arg_done") + emit_2("is_num", t, value_slot) + emit_jump_cond("jump_false", t, bad) + emit_3(op, dest, value_slot, place_slot) + emit_jump(done) + emit_label(bad) + emit_1("null", dest) + emit_label(done) + return dest + } + // Scan scope record for variable declarations var scan_scope = function() { var scope = find_scope_record(s_function_nr) @@ -1796,6 +1868,28 @@ var mcode = function(ast) { if (callee_kind == "name" && callee.intrinsic == true) { fname = callee.name nargs = args_list != null ? length(args_list) : 0 + mop = intrinsic_num_unary_ops[fname] + if (mop != null && nargs == 1) { + a0 = gen_expr(args_list[0], -1) + return emit_intrinsic_num_unary(mop, a0) + } + mop = intrinsic_num_binary_ops[fname] + if (mop != null && nargs == 2) { + a0 = gen_expr(args_list[0], -1) + a1 = gen_expr(args_list[1], -1) + return emit_intrinsic_num_binary(mop, a0, a1) + } + mop = intrinsic_num_place_ops[fname] + if (mop != null && (nargs == 1 || nargs == 2)) { + a0 = gen_expr(args_list[0], -1) + if (nargs == 2) { + a1 = gen_expr(args_list[1], -1) + } else { + a1 = alloc_slot() + emit_1("null", a1) + } + return emit_intrinsic_num_place(mop, a0, a1) + } // 1-arg type check intrinsics → direct opcode if (nargs == 1 && sensory_ops[fname] != null) { a0 = gen_expr(args_list[0], -1) diff --git a/qbe_emit.cm b/qbe_emit.cm index a5e2734d..8d103c93 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -1730,9 +1730,192 @@ var qbe_emit = function(ir, qbe, export_name) { p = fresh() lhs_d = emit_num_to_double(lhs) rhs_d = emit_num_to_double(rhs) - emit(` %${p}_rd =d call $fmod(d ${lhs_d}, d ${rhs_d})`) + emit(` %${p}_lhs_nan =w cned ${lhs_d}, ${lhs_d}`) + emit(` %${p}_rhs_nan =w cned ${rhs_d}, ${rhs_d}`) + emit(` %${p}_has_nan =w or %${p}_lhs_nan, %${p}_rhs_nan`) + emit(` jnz %${p}_has_nan, @${p}_bad, @${p}_chk0`) + emit(`@${p}_chk0`) + emit(` %${p}_rhs0 =w ceqd ${rhs_d}, d_0.0`) + emit(` jnz %${p}_rhs0, @${p}_bad, @${p}_calc`) + emit(`@${p}_calc`) + emit(` %${p}_q =d div ${lhs_d}, ${rhs_d}`) + emit(` %${p}_qf =d call $floor(d %${p}_q)`) + emit(` %${p}_m =d mul ${rhs_d}, %${p}_qf`) + emit(` %${p}_rd =d sub ${lhs_d}, %${p}_m`) emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) s_write(a1, `%${p}_r`) + emit(` jmp @${p}_done`) + emit(`@${p}_bad`) + s_write(a1, text(qbe.js_null)) + emit(`@${p}_done`) + continue + } + if (op == "remainder") { + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rhs0 =w ceqd ${rhs_d}, d_0.0`) + emit(` jnz %${p}_rhs0, @${p}_bad, @${p}_calc`) + emit(`@${p}_calc`) + emit(` %${p}_q =d div ${lhs_d}, ${rhs_d}`) + emit(` %${p}_qt =d call $trunc(d %${p}_q)`) + emit(` %${p}_m =d mul ${rhs_d}, %${p}_qt`) + emit(` %${p}_rd =d sub ${lhs_d}, %${p}_m`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(` jmp @${p}_done`) + emit(`@${p}_bad`) + s_write(a1, text(qbe.js_null)) + emit(`@${p}_done`) + continue + } + if (op == "max" || op == "min") { + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + if (op == "max") { + emit(` %${p}_take_l =w cgtd ${lhs_d}, ${rhs_d}`) + } else { + emit(` %${p}_take_l =w cltd ${lhs_d}, ${rhs_d}`) + } + emit(` jnz %${p}_take_l, @${p}_lhs, @${p}_rhs`) + emit(`@${p}_lhs`) + emit(` %${p}_rd =d copy ${lhs_d}`) + emit(` jmp @${p}_done_math`) + emit(`@${p}_rhs`) + emit(` %${p}_rd =d copy ${rhs_d}`) + emit(`@${p}_done_math`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "abs") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_rd =d call $fabs(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "sign") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_lt0 =w cltd ${lhs_d}, d_0.0`) + emit(` jnz %${p}_lt0, @${p}_neg, @${p}_chk_pos`) + emit(`@${p}_chk_pos`) + emit(` %${p}_gt0 =w cgtd ${lhs_d}, d_0.0`) + emit(` jnz %${p}_gt0, @${p}_pos, @${p}_zero`) + emit(`@${p}_neg`) + s_write(a1, text(-2)) + emit(` jmp @${p}_done`) + emit(`@${p}_pos`) + s_write(a1, text(2)) + emit(` jmp @${p}_done`) + emit(`@${p}_zero`) + s_write(a1, text(0)) + emit(`@${p}_done`) + continue + } + if (op == "fraction") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_ti =d call $trunc(d ${lhs_d})`) + emit(` %${p}_rd =d sub ${lhs_d}, %${p}_ti`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "integer") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_rd =d call $trunc(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "floor" || op == "ceiling" || op == "round" || op == "trunc") { + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_lhs_num =w copy ${emit_is_num_w(lhs)}`) + emit(` jnz %${p}_lhs_num, @${p}_place, @${p}_bad`) + emit(`@${p}_place`) + emit(` %${p}_t1 =l and ${rhs}, 1`) + emit(` %${p}_is_int =w ceql %${p}_t1, 0`) + emit(` jnz %${p}_is_int, @${p}_pi_int, @${p}_pi_not_int`) + emit(`@${p}_pi_int`) + emit(` %${p}_pil =l sar ${rhs}, 1`) + emit(` %${p}_piw =w copy %${p}_pil`) + emit(` jmp @${p}_pi_done`) + emit(`@${p}_pi_not_int`) + emit(` %${p}_t5 =l and ${rhs}, 31`) + emit(` %${p}_is_null =w ceql %${p}_t5, 7`) + emit(` jnz %${p}_is_null, @${p}_pi_zero, @${p}_pi_chk_bool`) + emit(`@${p}_pi_zero`) + emit(` %${p}_piw =w copy 0`) + emit(` jmp @${p}_pi_done`) + emit(`@${p}_pi_chk_bool`) + emit(` %${p}_is_bool =w ceql %${p}_t5, 3`) + emit(` jnz %${p}_is_bool, @${p}_pi_bool, @${p}_pi_chk_float`) + emit(`@${p}_pi_bool`) + emit(` %${p}_bl =l shr ${rhs}, 5`) + emit(` %${p}_bw =w copy %${p}_bl`) + emit(` %${p}_piw =w and %${p}_bw, 1`) + emit(` jmp @${p}_pi_done`) + emit(`@${p}_pi_chk_float`) + emit(` %${p}_t3 =l and ${rhs}, 7`) + emit(` %${p}_is_float =w ceql %${p}_t3, 5`) + emit(` jnz %${p}_is_float, @${p}_pi_float, @${p}_bad`) + emit(`@${p}_pi_float`) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_piw =w dtosi ${rhs_d}`) + emit(`@${p}_pi_done`) + emit(` %${p}_is_zero =w ceqw %${p}_piw, 0`) + emit(` jnz %${p}_is_zero, @${p}_direct, @${p}_scaled`) + emit(`@${p}_direct`) + if (op == "floor") { + emit(` %${p}_rd =d call $floor(d ${lhs_d})`) + } else if (op == "ceiling") { + emit(` %${p}_rd =d call $ceil(d ${lhs_d})`) + } else if (op == "round") { + emit(` %${p}_rd =d call $round(d ${lhs_d})`) + } else { + emit(` %${p}_rd =d call $trunc(d ${lhs_d})`) + } + emit(` jmp @${p}_store`) + emit(`@${p}_scaled`) + emit(` %${p}_pl =l extsw %${p}_piw`) + emit(` %${p}_pd =d sltof %${p}_pl`) + emit(` %${p}_negpd =d neg %${p}_pd`) + emit(` %${p}_mult =d call $pow(d d_10.0, d %${p}_negpd)`) + emit(` %${p}_sd =d mul ${lhs_d}, %${p}_mult`) + if (op == "floor") { + emit(` %${p}_sr =d call $floor(d %${p}_sd)`) + } else if (op == "ceiling") { + emit(` %${p}_sr =d call $ceil(d %${p}_sd)`) + } else if (op == "round") { + emit(` %${p}_sr =d call $round(d %${p}_sd)`) + } else { + emit(` %${p}_sr =d call $trunc(d %${p}_sd)`) + } + emit(` %${p}_rd =d div %${p}_sr, %${p}_mult`) + emit(` jmp @${p}_store`) + emit(`@${p}_bad`) + s_write(a1, text(qbe.js_null)) + emit(` jmp @${p}_done`) + emit(`@${p}_store`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "negate") { diff --git a/source/mach.c b/source/mach.c index 0f72afaa..01a1c37b 100644 --- a/source/mach.c +++ b/source/mach.c @@ -547,7 +547,7 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { return JS_NewFloat64(ctx, (double)ia / (double)ib); case MACH_MOD: if (ib == 0) return JS_NULL; - return JS_NewInt32(ctx, ia % ib); + return JS_NewFloat64(ctx, (double)ia - ((double)ib * floor((double)ia / (double)ib))); case MACH_EQ: return JS_NewBool(ctx, ia == ib); case MACH_NEQ: @@ -673,8 +673,9 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { } case MACH_MOD: { if (db == 0.0) return JS_NULL; - double r = fmod(da, db); - if (!isfinite(r)) return JS_NULL; + if (isnan(da) || isnan(db)) return JS_NULL; + if (da == 0.0) return JS_NewFloat64(ctx, 0.0); + double r = da - (db * floor(da / db)); return JS_NewFloat64(ctx, r); } case MACH_POW: { @@ -704,6 +705,34 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { return JS_RaiseDisrupt(ctx, "type mismatch in binary operation"); } +static inline int mach_get_number(JSValue v, double *out) { + uint32_t tag = JS_VALUE_GET_TAG(v); + if (tag == JS_TAG_INT) { + *out = (double)JS_VALUE_GET_INT(v); + return 0; + } + if (JS_TAG_IS_FLOAT64(tag)) { + *out = JS_VALUE_GET_FLOAT64(v); + return 0; + } + return -1; +} + +static inline int mach_get_place(JSContext *ctx, JSValue v, int32_t *out) { + uint32_t tag = JS_VALUE_GET_NORM_TAG(v); + if (tag == JS_TAG_INT || tag == JS_TAG_BOOL || tag == JS_TAG_NULL || tag == JS_TAG_FLOAT64) { + return JS_ToInt32(ctx, out, v); + } + return -1; +} + +static inline double mach_apply_place(double d, int32_t place, double (*f)(double)) { + if (place == 0) + return f(d); + double mult = pow(10.0, -(double)place); + return f(d * mult) / mult; +} + #ifdef HAVE_ASAN void __asan_on_error(void) { @@ -856,6 +885,12 @@ vm_dispatch: DT(MACH_MUL), DT(MACH_DIV), DT(MACH_MOD), DT(MACH_POW), DT(MACH_NEG), + DT(MACH_REMAINDER), DT(MACH_MAX), + DT(MACH_MIN), DT(MACH_ABS), + DT(MACH_SIGN), DT(MACH_FRACTION), + DT(MACH_INTEGER), DT(MACH_FLOOR), + DT(MACH_CEILING), DT(MACH_ROUND), + DT(MACH_TRUNC), DT(MACH_EQ), DT(MACH_NEQ), DT(MACH_LT), DT(MACH_LE), DT(MACH_GT), DT(MACH_GE), @@ -1043,17 +1078,15 @@ vm_dispatch: } VM_CASE(MACH_MOD): { JSValue left = frame->slots[b], right = frame->slots[c]; - if (JS_VALUE_IS_BOTH_INT(left, right)) { - int32_t ib = JS_VALUE_GET_INT(right); - frame->slots[a] = (ib != 0) ? JS_NewInt32(ctx, JS_VALUE_GET_INT(left) % ib) : JS_NULL; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0 || db == 0.0 || + isnan(da) || isnan(db)) { + frame->slots[a] = JS_NULL; } else { - double da, db, r; - JS_ToFloat64(ctx, &da, left); - JS_ToFloat64(ctx, &db, right); - if (db == 0.0) { frame->slots[a] = JS_NULL; } - else { - r = fmod(da, db); - frame->slots[a] = !isfinite(r) ? JS_NULL : JS_NewFloat64(ctx, r); + if (da == 0.0) { + frame->slots[a] = JS_NewFloat64(ctx, 0.0); + } else { + frame->slots[a] = JS_NewFloat64(ctx, da - (db * floor(da / db))); } } VM_BREAK(); @@ -1077,6 +1110,116 @@ vm_dispatch: VM_BREAK(); } + VM_CASE(MACH_REMAINDER): { + JSValue left = frame->slots[b], right = frame->slots[c]; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0 || db == 0.0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, da - (trunc(da / db) * db)); + } + VM_BREAK(); + } + + VM_CASE(MACH_MAX): { + JSValue left = frame->slots[b], right = frame->slots[c]; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, da > db ? da : db); + } + VM_BREAK(); + } + + VM_CASE(MACH_MIN): { + JSValue left = frame->slots[b], right = frame->slots[c]; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, da < db ? da : db); + } + VM_BREAK(); + } + + VM_CASE(MACH_ABS): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, fabs(d)); + } + VM_BREAK(); + } + + VM_CASE(MACH_SIGN): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else if (d < 0) { + frame->slots[a] = JS_NewInt32(ctx, -1); + } else if (d > 0) { + frame->slots[a] = JS_NewInt32(ctx, 1); + } else { + frame->slots[a] = JS_NewInt32(ctx, 0); + } + VM_BREAK(); + } + + VM_CASE(MACH_FRACTION): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, d - trunc(d)); + } + VM_BREAK(); + } + + VM_CASE(MACH_INTEGER): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, trunc(d)); + } + VM_BREAK(); + } + + VM_CASE(MACH_FLOOR): + VM_CASE(MACH_CEILING): + VM_CASE(MACH_ROUND): + VM_CASE(MACH_TRUNC): { + JSValue v = frame->slots[b]; + JSValue pval = frame->slots[c]; + double d, r; + int32_t place = 0; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + VM_BREAK(); + } + if (!JS_IsNull(pval) && mach_get_place(ctx, pval, &place) != 0) { + frame->slots[a] = JS_NULL; + VM_BREAK(); + } + if (op == MACH_FLOOR) { + r = mach_apply_place(d, place, floor); + } else if (op == MACH_CEILING) { + r = mach_apply_place(d, place, ceil); + } else if (op == MACH_ROUND) { + r = mach_apply_place(d, place, round); + } else { + r = mach_apply_place(d, place, trunc); + } + frame->slots[a] = JS_NewFloat64(ctx, r); + VM_BREAK(); + } + /* Comparison — inline integer fast paths */ VM_CASE(MACH_EQ): { JSValue left = frame->slots[b], right = frame->slots[c]; @@ -2403,6 +2546,17 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { else if (strcmp(op, "modulo") == 0) { ABC3(MACH_MOD); } else if (strcmp(op, "pow") == 0) { ABC3(MACH_POW); } else if (strcmp(op, "negate") == 0) { AB2(MACH_NEG); } + else if (strcmp(op, "remainder") == 0) { ABC3(MACH_REMAINDER); } + else if (strcmp(op, "max") == 0) { ABC3(MACH_MAX); } + else if (strcmp(op, "min") == 0) { ABC3(MACH_MIN); } + else if (strcmp(op, "abs") == 0) { AB2(MACH_ABS); } + else if (strcmp(op, "sign") == 0) { AB2(MACH_SIGN); } + else if (strcmp(op, "fraction") == 0) { AB2(MACH_FRACTION); } + else if (strcmp(op, "integer") == 0) { AB2(MACH_INTEGER); } + else if (strcmp(op, "floor") == 0) { ABC3(MACH_FLOOR); } + else if (strcmp(op, "ceiling") == 0) { ABC3(MACH_CEILING); } + else if (strcmp(op, "round") == 0) { ABC3(MACH_ROUND); } + else if (strcmp(op, "trunc") == 0) { ABC3(MACH_TRUNC); } /* Typed integer comparisons */ else if (strcmp(op, "eq_int") == 0) { ABC3(MACH_EQ_INT); } else if (strcmp(op, "ne_int") == 0) { ABC3(MACH_NE_INT); } @@ -3107,4 +3261,3 @@ void JS_DumpMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue en dump_register_code(ctx, code, 0); JS_PopGCRef(ctx, &env_ref); } - diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index 48d8d483..7109ddee 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -487,6 +487,17 @@ typedef enum MachOpcode { MACH_MOD, /* R(A) = R(B) % R(C) */ MACH_POW, /* R(A) = R(B) ** R(C) */ MACH_NEG, /* R(A) = -R(B) */ + MACH_REMAINDER, /* R(A) = remainder(R(B), R(C)) */ + MACH_MAX, /* R(A) = max(R(B), R(C)) */ + MACH_MIN, /* R(A) = min(R(B), R(C)) */ + MACH_ABS, /* R(A) = abs(R(B)) */ + MACH_SIGN, /* R(A) = sign(R(B)) */ + MACH_FRACTION, /* R(A) = fraction(R(B)) */ + MACH_INTEGER, /* R(A) = integer(R(B)) */ + MACH_FLOOR, /* R(A) = floor(R(B), R(C)) */ + MACH_CEILING, /* R(A) = ceiling(R(B), R(C)) */ + MACH_ROUND, /* R(A) = round(R(B), R(C)) */ + MACH_TRUNC, /* R(A) = trunc(R(B), R(C)) */ MACH__DEAD_INC, /* reserved — was MACH_INC, never emitted */ MACH__DEAD_DEC, /* reserved — was MACH_DEC, never emitted */ @@ -660,6 +671,17 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = { [MACH_MOD] = "mod", [MACH_POW] = "pow", [MACH_NEG] = "neg", + [MACH_REMAINDER] = "remainder", + [MACH_MAX] = "max", + [MACH_MIN] = "min", + [MACH_ABS] = "abs", + [MACH_SIGN] = "sign", + [MACH_FRACTION] = "fraction", + [MACH_INTEGER] = "integer", + [MACH_FLOOR] = "floor", + [MACH_CEILING] = "ceiling", + [MACH_ROUND] = "round", + [MACH_TRUNC] = "trunc", [MACH__DEAD_INC] = "dead_inc", [MACH__DEAD_DEC] = "dead_dec", [MACH_EQ] = "eq", diff --git a/streamline.cm b/streamline.cm index 16fcd8fb..9fe9adb5 100644 --- a/streamline.cm +++ b/streamline.cm @@ -37,7 +37,8 @@ var streamline = function(ir, log) { var numeric_ops = { add: true, subtract: true, multiply: true, - divide: true, modulo: true, pow: true + divide: true, modulo: true, remainder: true, + max: true, min: true, pow: true } var bool_result_ops = { eq_int: true, ne_int: true, lt_int: true, gt_int: true, @@ -229,7 +230,12 @@ var streamline = function(ir, log) { add: [2, T_NUM, 3, T_NUM], subtract: [2, T_NUM, 3, T_NUM], multiply: [2, T_NUM, 3, T_NUM], divide: [2, T_NUM, 3, T_NUM], modulo: [2, T_NUM, 3, T_NUM], - pow: [2, T_NUM, 3, T_NUM], negate: [2, T_NUM], + remainder: [2, T_NUM, 3, T_NUM], max: [2, T_NUM, 3, T_NUM], + min: [2, T_NUM, 3, T_NUM], pow: [2, T_NUM, 3, T_NUM], + negate: [2, T_NUM], abs: [2, T_NUM], sign: [2, T_NUM], + fraction: [2, T_NUM], integer: [2, T_NUM], + floor: [2, T_NUM], ceiling: [2, T_NUM], + round: [2, T_NUM], trunc: [2, T_NUM], bitand: [2, T_INT, 3, T_INT], bitor: [2, T_INT, 3, T_INT], bitxor: [2, T_INT, 3, T_INT], shl: [2, T_INT, 3, T_INT], shr: [2, T_INT, 3, T_INT], ushr: [2, T_INT, 3, T_INT], @@ -332,10 +338,14 @@ var streamline = function(ir, log) { bitnot: [1, T_INT], bitand: [1, T_INT], bitor: [1, T_INT], bitxor: [1, T_INT], shl: [1, T_INT], shr: [1, T_INT], ushr: [1, T_INT], negate: [1, T_NUM], concat: [1, T_TEXT], + abs: [1, T_NUM], sign: [1, T_INT], fraction: [1, T_NUM], + integer: [1, T_NUM], floor: [1, T_NUM], ceiling: [1, T_NUM], + round: [1, T_NUM], trunc: [1, T_NUM], eq: [1, T_BOOL], ne: [1, T_BOOL], lt: [1, T_BOOL], le: [1, T_BOOL], gt: [1, T_BOOL], ge: [1, T_BOOL], in: [1, T_BOOL], add: [1, T_NUM], subtract: [1, T_NUM], multiply: [1, T_NUM], - divide: [1, T_NUM], modulo: [1, T_NUM], pow: [1, T_NUM], + divide: [1, T_NUM], modulo: [1, T_NUM], remainder: [1, T_NUM], + max: [1, T_NUM], min: [1, T_NUM], pow: [1, T_NUM], move: [1, T_UNKNOWN], load_field: [1, T_UNKNOWN], load_index: [1, T_UNKNOWN], load_dynamic: [1, T_UNKNOWN], pop: [1, T_UNKNOWN], get: [1, T_UNKNOWN], @@ -359,8 +369,8 @@ var streamline = function(ir, log) { var intrinsic_return_types = { abs: T_NUM, floor: T_NUM, ceiling: T_NUM, round: T_NUM, trunc: T_NUM, fraction: T_NUM, - integer: T_NUM, sign: T_NUM, - max: T_NUM, min: T_NUM + integer: T_NUM, whole: T_NUM, sign: T_NUM, + max: T_NUM, min: T_NUM, remainder: T_NUM, modulo: T_NUM } var infer_slot_write_types = function(func, param_types) { From e004b2c472e60a7d47e9089c387f59b8f0126312 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 22:37:48 -0600 Subject: [PATCH 7/9] optimize frames; remove trampoline --- build.cm | 2 +- docs/cli.md | 3 + docs/shop.md | 13 ++- docs/testing.md | 39 +++++++++ fold.cm | 2 +- internal/bootstrap.cm | 4 +- internal/engine.cm | 4 +- internal/shop.cm | 16 +++- mcode.cm | 9 ++- qbe_emit.cm | 162 +++++++++++++++++++++++++++----------- source/cell.c | 3 + source/qbe_helpers.c | 46 ++++++++++- source/quickjs-internal.h | 13 --- source/runtime.c | 93 +++++++++++++++++----- 14 files changed, 318 insertions(+), 91 deletions(-) diff --git a/build.cm b/build.cm index b233b527..400ba0f6 100644 --- a/build.cm +++ b/build.cm @@ -81,7 +81,7 @@ function content_hash(str) { } // Bump when native codegen/runtime ABI changes so stale dylibs are not reused. -def NATIVE_CACHE_VERSION = "native-v22" +def NATIVE_CACHE_VERSION = "native-v23" // Enable AOT ASan by creating .cell/asan_aot in the package root. function native_sanitize_flags() { diff --git a/docs/cli.md b/docs/cli.md index 1adad63d..3acf910d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -168,6 +168,9 @@ pit bench # run specific benchmark file pit bench package # benchmark a named package pit bench package # specific benchmark in a package pit bench package all # benchmark all packages +pit bench --bytecode # force bytecode-only benchmark run +pit bench --native # force native-only benchmark run +pit bench --compare # run bytecode and native side-by-side ``` Output includes median, mean, standard deviation, and percentiles for each benchmark. diff --git a/docs/shop.md b/docs/shop.md index 0a94cb16..996589b7 100644 --- a/docs/shop.md +++ b/docs/shop.md @@ -37,7 +37,12 @@ On a cache miss, the C runtime loads `boot/bootstrap.cm.mcode` (a pre-compiled s ### Cache invalidation -All caching is content-addressed by BLAKE2 hash of the source. When any source file changes, its hash changes and the old cache entry is simply never looked up again. No manual invalidation is needed. To force a full rebuild, delete `~/.pit/build/`. +Caching is content-addressed by BLAKE2 over the relevant inputs for each artifact. + +- Mach/script cache keys are source-content based. +- Native (`.dylib`) cache keys include source, host target, native mode marker, native cache version, and sanitize flags. + +When inputs change, the old cache entry is simply never looked up again. To force a full rebuild, delete `~/.pit/build/` (or run `cell --dev clean shop --build` in a dev workspace). ## Module Resolution @@ -73,7 +78,7 @@ use('gitea.pockle.world/john/renderer/sprite') ## Compilation and Caching -Every module goes through a content-addressed caching pipeline. The cache key is the BLAKE2 hash of the source content, so changing the source automatically invalidates the cache. +Every module goes through a content-addressed caching pipeline. Cache keys are based on the inputs that affect the output artifact, so changing any relevant input automatically invalidates the cache. ### Cache Hierarchy @@ -104,7 +109,9 @@ The build cache at `~/.pit/build/` stores ephemeral artifacts named by the BLAKE └── f3a4b5c6... # compiled dylib (checked before copying to lib/) ``` -This scheme provides automatic cache invalidation: when source changes, its hash changes, and the old cache entry is simply never looked up again. When building a dylib, the build cache is checked first — if a matching hash exists, it is copied to `lib/` without recompiling. +This scheme provides automatic cache invalidation: when an input changes, its hash changes, and the old cache entry is simply never looked up again. For native dylibs, inputs include target and native cache version in addition to source. + +When native codegen/runtime ABI changes, bump `NATIVE_CACHE_VERSION` in both `build.cm` and `internal/shop.cm` so stale native artifacts are never reused. ### Core Module Caching diff --git a/docs/testing.md b/docs/testing.md index ea7d2cfc..38e98e48 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -118,6 +118,45 @@ When a mismatch is found: MISMATCH: test_foo: result mismatch opt=42 noopt=43 ``` +## ASAN for Native AOT + +When debugging native (`shop.use_native`) crashes, there are two useful sanitizer workflows. + +### 1) AOT-only sanitizer (fastest loop) + +Enable sanitizer flags for generated native modules by creating a marker file: + +```bash +touch .cell/asan_aot +cell --dev bench --native fibonacci +``` + +This adds `-fsanitize=address -fno-omit-frame-pointer` to AOT module compilation. + +Disable it with: + +```bash +rm -f .cell/asan_aot +``` + +### 2) Full runtime sanitizer (CLI + runtime + AOT) + +Build an ASAN-instrumented `cell` binary: + +```bash +meson setup build-asan -Dbuildtype=debug -Db_sanitize=address +CCACHE_DISABLE=1 meson compile -C build-asan +ASAN_OPTIONS=abort_on_error=1:detect_leaks=0 ./build-asan/cell --dev bench --native fibonacci +``` + +This catches bugs crossing the boundary between generated dylibs and runtime helpers. + +If stale native artifacts are suspected after compiler/runtime changes, clear build outputs first: + +```bash +cell --dev clean shop --build +``` + ## Fuzz Testing The fuzzer generates random self-checking programs, compiles them, and runs them through both optimized and unoptimized paths. Each generated program contains test functions that validate their own expected results, so failures catch both correctness bugs and optimizer mismatches. diff --git a/fold.cm b/fold.cm index 627060bd..a686c844 100644 --- a/fold.cm +++ b/fold.cm @@ -458,7 +458,7 @@ var fold = function(ast) { else if (k == "-") result = lv - rv else if (k == "*") result = lv * rv else if (k == "/") result = lv / rv - else if (k == "%") result = lv % rv + else if (k == "%") result = lv - (trunc(lv / rv) * rv) else if (k == "**") result = lv ** rv if (result == null) return make_null(expr) return make_number(result, expr) diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm index cd6f16b7..d2fb6590 100644 --- a/internal/bootstrap.cm +++ b/internal/bootstrap.cm @@ -11,7 +11,9 @@ var json_mod = use_embed('json') var crypto = use_embed('crypto') function content_hash(content) { - return text(crypto.blake2(content), 'h') + var data = content + if (!is_blob(data)) data = stone(blob(text(data))) + return text(crypto.blake2(data), 'h') } function cache_path(hash) { diff --git a/internal/engine.cm b/internal/engine.cm index 53ffcae6..4cee335e 100644 --- a/internal/engine.cm +++ b/internal/engine.cm @@ -35,7 +35,9 @@ var packages_path = shop_path ? shop_path + '/packages' : null var use_cache = {} function content_hash(content) { - return text(crypto.blake2(content), 'h') + var data = content + if (!is_blob(data)) data = stone(blob(text(data))) + return text(crypto.blake2(data), 'h') } function cache_path(hash) { diff --git a/internal/shop.cm b/internal/shop.cm index 9d6285c8..3f8310e8 100644 --- a/internal/shop.cm +++ b/internal/shop.cm @@ -434,6 +434,16 @@ function detect_host_target() { var host_target = detect_host_target() +// Must match build.cm NATIVE_CACHE_VERSION to detect stale native artifacts. +def NATIVE_CACHE_VERSION = "native-v23" + +function native_sanitize_flags() { + if (fd.is_file('.cell/asan_aot')) { + return ' -fsanitize=address -fno-omit-frame-pointer' + } + return '' +} + // Check for a native .cm dylib at the deterministic lib path // Returns a native descriptor {_native, _handle, _sym}, or null if no native dylib exists // Also checks staleness: if source has changed, the content-addressed build artifact @@ -444,6 +454,7 @@ function try_native_mod_dylib(pkg, stem) { var src = null var host = null var hash = null + var san_flags = null var tc_ext = null var build_path = null var handle = null @@ -456,7 +467,8 @@ function try_native_mod_dylib(pkg, stem) { if (fd.is_file(src_path)) { src = text(fd.slurp(src_path)) host = detect_host_target() - hash = content_hash(src + '\n' + host + '\nnative') + san_flags = native_sanitize_flags() + hash = content_hash(src + '\n' + host + '\nnative\n' + NATIVE_CACHE_VERSION + '\n' + san_flags) tc_ext = dylib_ext build_path = global_shop_path + '/build/' + hash + '.' + host + tc_ext if (!fd.is_file(build_path)) return null @@ -1918,4 +1930,4 @@ Shop.use_native = function(path, package_context) { return os.native_module_load(handle, env) } -return Shop \ No newline at end of file +return Shop diff --git a/mcode.cm b/mcode.cm index ebad4bf3..dc37d395 100644 --- a/mcode.cm +++ b/mcode.cm @@ -4,7 +4,7 @@ var mcode = function(ast) { // Translation tables var binop_map = { "+": "add", "-": "subtract", "*": "multiply", "/": "divide", - "%": "modulo", "&": "bitand", "|": "bitor", "^": "bitxor", + "%": "remainder", "&": "bitand", "|": "bitor", "^": "bitxor", "<<": "shl", ">>": "shr", ">>>": "ushr", "==": "eq", "===": "eq", "!=": "ne", "!==": "ne", "<": "lt", "<=": "le", ">": "gt", ">=": "ge", @@ -24,13 +24,13 @@ var mcode = function(ast) { var binop_sym = { add: "+", subtract: "-", multiply: "*", divide: "/", - modulo: "%", pow: "**", + remainder: "%", pow: "**", lt: "<", le: "<=", gt: ">", ge: ">=" } var compound_map = { "+=": "add", "-=": "subtract", "*=": "multiply", "/=": "divide", - "%=": "modulo", "&=": "bitand", "|=": "bitor", "^=": "bitxor", + "%=": "remainder", "&=": "bitand", "|=": "bitor", "^=": "bitxor", "<<=": "shl", ">>=": "shr", ">>>=": "ushr" } @@ -673,7 +673,8 @@ var mcode = function(ast) { if (rel != null) { emit_relational(rel[0], rel[1], rel[2]) } else if (op_str == "subtract" || op_str == "multiply" || - op_str == "divide" || op_str == "modulo" || op_str == "pow") { + op_str == "divide" || op_str == "modulo" || op_str == "remainder" || + op_str == "pow") { emit_numeric_binop(op_str) } else { // Passthrough for bitwise, in, etc. diff --git a/qbe_emit.cm b/qbe_emit.cm index 8d103c93..314ad593 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -1113,6 +1113,9 @@ var qbe_emit = function(ir, qbe, export_name) { var ri = 0 var seg_num = 0 var resume_val = 0 + // Native calls should mirror MACH semantics: function calls are mediated + // by the frame dispatcher, not recursive C calls. + var use_invoke_trampoline = true var j_lbl = null var j_idx = null var jt_lbl = null @@ -1139,6 +1142,10 @@ var qbe_emit = function(ir, qbe, export_name) { var floor_this_slot = 0 var floor_arg_slot = 0 var floor_dest_slot = 0 + var text_frame_slot = 0 + var text_this_slot = 0 + var text_arg_slot = 0 + var text_dest_slot = 0 // Pre-scan: count invoke/tail_invoke points to assign segment numbers. // Must skip dead code (instructions after terminators) the same way @@ -1187,15 +1194,41 @@ var qbe_emit = function(ir, qbe, export_name) { } } - if (scan_op == "invoke") { + // Keep invoke segment counting consistent with main-loop peephole: + // inline text intrinsic call sequence does not emit an invoke. + if (scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "text") { + if (si + 4 < length(instrs)) { + peek1 = instrs[si] + peek2 = instrs[si + 1] + peek3 = instrs[si + 2] + peek4 = instrs[si + 3] + peek5 = instrs[si + 4] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + text_frame_slot = peek1[1] + text_this_slot = peek2[1] + if (peek3[1] == text_frame_slot && peek3[2] == 0 && peek3[3] == text_this_slot && + peek4[1] == text_frame_slot && peek4[2] == 1 && + peek5[1] == text_frame_slot && peek5[2] == text_this_slot) { + si = si + 5 + continue + } + } + } + } + + if (use_invoke_trampoline && (scan_op == "invoke" || scan_op == "tail_invoke")) { invoke_count = invoke_count + 1 } // Track terminators — same set as in the main loop - if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "tail_invoke" || scan_op == "disrupt") { + if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "disrupt") { scan_dead = true } } - has_invokes = invoke_count > 0 + has_invokes = use_invoke_trampoline && invoke_count > 0 // Function signature: (ctx, frame_ptr) → JSValue emit(`export function l $${name}(l %ctx, l %fp) {`) @@ -1524,6 +1557,39 @@ var qbe_emit = function(ir, qbe, export_name) { } } + // Peephole: inline `text(x)` intrinsic call sequence + // access text; frame; null this; setarg 0 this; setarg 1 x; invoke + if (op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "text") { + if (instr_idx + 5 < length(instrs)) { + peek1 = instrs[instr_idx + 1] + peek2 = instrs[instr_idx + 2] + peek3 = instrs[instr_idx + 3] + peek4 = instrs[instr_idx + 4] + peek5 = instrs[instr_idx + 5] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + text_frame_slot = peek1[1] + text_this_slot = peek2[1] + if (peek3[1] == text_frame_slot && peek3[2] == 0 && peek3[3] == text_this_slot && + peek4[1] == text_frame_slot && peek4[2] == 1 && + peek5[1] == text_frame_slot && peek5[2] == text_this_slot) { + text_arg_slot = peek4[3] + text_dest_slot = peek5[2] + v = s_read(text_arg_slot) + p = fresh() + emit(` %${p}_r =l call $JS_CellText(l %ctx, l ${v})`) + refresh_fp() + s_write(text_dest_slot, `%${p}_r`) + i = instr_idx + 6 + continue + } + } + } + } + // --- Constants --- if (op == "int") { @@ -2416,42 +2482,33 @@ var qbe_emit = function(ir, qbe, export_name) { emit(` storel ${lhs}, %${p}_slot`) continue } - if (op == "invoke") { - // Dispatch loop invoke: store resume info, signal, return 0 - seg_counter = seg_counter + 1 - seg_num = seg_counter - // Store (seg_num << 16 | result_slot) as tagged int in frame->address - resume_val = seg_num * 65536 + a2 - // frame->address is at fp - 8, store as tagged int (n << 1) - emit(` %_inv_addr${text(seg_num)} =l sub %fp, 8`) - emit(` storel ${text(resume_val * 2)}, %_inv_addr${text(seg_num)}`) - emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) - emit(" ret 0") - emit(`@_seg${text(seg_num)}`) - // Check for exception marker in destination slot after resume. - // Dispatch writes JS_EXCEPTION into ret_slot on exceptional return. - rv = s_read(a2) - p = fresh() - emit(` %${p} =w ceql ${rv}, ${text(qbe.js_exception)}`) - if (has_handler && !in_handler) { - emit(` jnz %${p}, @disruption_handler, @${p}_ok`) + if (op == "invoke" || op == "tail_invoke") { + if (use_invoke_trampoline) { + // Signal dispatcher to call frame in slot a1 and resume at @_segN. + seg_counter = seg_counter + 1 + resume_val = seg_counter * 65536 + a2 + p = fresh() + emit(` %${p}_addrp =l sub %fp, 8`) + // frame->address holds JS_NewInt32((seg << 16) | ret_slot), tagged. + emit(` storel ${text(resume_val * 2)}, %${p}_addrp`) + emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) + emit(` ret ${text(qbe.js_null)}`) + emit(`@_seg${text(seg_counter)}`) + // Dispatcher writes JS_EXCEPTION into ret slot on error; branch here. + v = s_read(a2) + emit(` %${p}_exc =w ceql ${v}, ${text(qbe.js_exception)}`) + if (has_handler && !in_handler) { + emit(` jnz %${p}_exc, @disruption_handler, @${p}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${p}_exc, @_exc_ret, @${p}_ok`) + } + emit(`@${p}_ok`) } else { - needs_exc_ret = true - emit(` jnz %${p}, @_exc_ret, @${p}_ok`) + // Direct helper invoke path (disabled by default). + emit(` %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) + emit_exc_check() } - emit(`@${p}_ok`) - last_was_term = false - continue - } - if (op == "tail_invoke") { - // Tail call: hand control to dispatch loop and do not resume this segment. - // Use 0xFFFF as ret_slot (no result writeback into current frame). - p = fresh() - emit(` %${p}_addr =l sub %fp, 8`) - emit(` storel ${text(65535 * 2)}, %${p}_addr`) - emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) - emit(" ret 0") - last_was_term = true continue } if (op == "goframe") { @@ -2460,13 +2517,30 @@ var qbe_emit = function(ir, qbe, export_name) { continue } if (op == "goinvoke") { - // Dispatch loop tail call: signal tail call and return 0 - // Use 0xFFFF as ret_slot (no result to store — it's a tail call) - p = fresh() - emit(` %${p}_addr =l sub %fp, 8`) - emit(` storel ${text(65535 * 2)}, %${p}_addr`) - emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) - emit(" ret 0") + if (use_invoke_trampoline) { + // Tail call via dispatcher: no resume in this frame. + emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) + emit(` ret ${text(qbe.js_null)}`) + } else { + // Direct helper goinvoke path (disabled by default). + v = s_read(a1) + p = fresh() + emit(` %${p}_r =l call $cell_rt_goinvoke(l %ctx, l ${v})`) + emit(` %${p}_exc =w ceql %${p}_r, ${text(qbe.js_exception)}`) + if (has_handler && !in_handler) { + emit(` jnz %${p}_exc, @${p}_exc, @${p}_ok`) + emit(`@${p}_exc`) + emit(` %fp =l call $cell_rt_refresh_fp(l %ctx)`) + emit(` jmp @disruption_handler`) + emit(`@${p}_ok`) + emit(` ret %${p}_r`) + } else { + needs_exc_ret = true + emit(` jnz %${p}_exc, @_exc_ret, @${p}_ok`) + emit(`@${p}_ok`) + emit(` ret %${p}_r`) + } + } last_was_term = true continue } diff --git a/source/cell.c b/source/cell.c index a8ae53f5..0bb7d4c2 100644 --- a/source/cell.c +++ b/source/cell.c @@ -37,6 +37,7 @@ static char *compute_blake2_hex(const char *data, size_t size) { uint8_t hash[32]; crypto_blake2b(hash, 32, (const uint8_t *)data, size); char *hex = malloc(65); + if (!hex) return NULL; for (int i = 0; i < 32; i++) snprintf(hex + i * 2, 3, "%02x", hash[i]); return hex; @@ -64,6 +65,7 @@ static int write_cache_file(const char *path, const uint8_t *data, size_t size) // Returns heap-allocated binary data and sets *out_size, or NULL on failure static char *load_or_cache_bootstrap(const char *mcode_data, size_t mcode_size, size_t *out_size) { char *hex = compute_blake2_hex(mcode_data, mcode_size); + if (!hex) return NULL; char *cpath = build_cache_path(hex); free(hex); @@ -222,6 +224,7 @@ static char *try_engine_cache(size_t *out_size) { char *hex = compute_blake2_hex(src, src_size); free(src); + if (!hex) return NULL; char *cpath = build_cache_path(hex); if (!cpath) { free(hex); return NULL; } free(hex); diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 8d638a61..e12309c3 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -583,11 +583,13 @@ void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth, #define AOT_GC_REF_CHUNK_SIZE 1024 typedef struct AOTGCRefChunk { JSGCRef refs[AOT_GC_REF_CHUNK_SIZE]; + uint8_t inited[AOT_GC_REF_CHUNK_SIZE]; } AOTGCRefChunk; static CELL_THREAD_LOCAL AOTGCRefChunk **g_aot_gc_ref_chunks = NULL; static CELL_THREAD_LOCAL int g_aot_gc_ref_chunk_count = 0; static CELL_THREAD_LOCAL int g_aot_depth = 0; +static CELL_THREAD_LOCAL JSContext *g_aot_gc_ref_ctx = NULL; int cell_rt_native_active(void) { return g_aot_depth > 0; @@ -624,14 +626,50 @@ static inline JSGCRef *aot_gc_ref_at(int depth_index) { return &g_aot_gc_ref_chunks[chunk_index]->refs[slot_index]; } +static inline uint8_t *aot_gc_ref_inited_at(int depth_index) { + int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; + int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; + return &g_aot_gc_ref_chunks[chunk_index]->inited[slot_index]; +} + +/* GC refs are owned by a specific JSContext. If context changes on this thread, + unregister previous refs and reset per-slot initialization state. */ +static void aot_gc_ref_reset_ctx(JSContext *ctx) { + if (g_aot_gc_ref_ctx == ctx) + return; + if (g_aot_gc_ref_ctx) { + for (int ci = 0; ci < g_aot_gc_ref_chunk_count; ci++) { + AOTGCRefChunk *chunk = g_aot_gc_ref_chunks[ci]; + for (int si = 0; si < AOT_GC_REF_CHUNK_SIZE; si++) { + if (chunk->inited[si]) { + JS_DeleteGCRef(g_aot_gc_ref_ctx, &chunk->refs[si]); + chunk->inited[si] = 0; + chunk->refs[si].val = JS_NULL; + } + } + } + } + g_aot_gc_ref_ctx = ctx; +} + +static inline void aot_gc_ref_activate(JSContext *ctx, int depth_index) { + JSGCRef *ref = aot_gc_ref_at(depth_index); + uint8_t *inited = aot_gc_ref_inited_at(depth_index); + if (!*inited) { + JS_AddGCRef(ctx, ref); + *inited = 1; + } +} + JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { + aot_gc_ref_reset_ctx(ctx); if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) { return NULL; } JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots); if (!frame) return NULL; + aot_gc_ref_activate(ctx, g_aot_depth); JSGCRef *ref = aot_gc_ref_at(g_aot_depth); - JS_AddGCRef(ctx, ref); ref->val = JS_MKPTR(frame); g_aot_depth++; return (JSValue *)frame->slots; @@ -639,10 +677,11 @@ JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { /* Push an already-allocated frame onto the active AOT frame stack. */ static int cell_rt_push_existing_frame(JSContext *ctx, JSValue frame_val) { + aot_gc_ref_reset_ctx(ctx); if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) return 0; + aot_gc_ref_activate(ctx, g_aot_depth); JSGCRef *ref = aot_gc_ref_at(g_aot_depth); - JS_AddGCRef(ctx, ref); ref->val = frame_val; g_aot_depth++; return 1; @@ -682,12 +721,13 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { } void cell_rt_leave_frame(JSContext *ctx) { + (void)ctx; if (g_aot_depth <= 0) { fprintf(stderr, "[BUG] cell_rt_leave_frame underflow\n"); abort(); } g_aot_depth--; - JS_DeleteGCRef(ctx, aot_gc_ref_at(g_aot_depth)); + aot_gc_ref_at(g_aot_depth)->val = JS_NULL; } /* --- Function creation and calling --- */ diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index 7109ddee..04b0a512 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -1426,8 +1426,6 @@ static JSValue js_cell_splat (JSContext *ctx, JSValue this_val, int argc, JSValu static JSValue js_cell_meme (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_fn_apply (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_call (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_modulo (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_neg (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_not (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); JSValue js_cell_text_lower (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); JSValue js_cell_text_upper (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); @@ -1438,17 +1436,6 @@ static JSValue js_cell_text_search (JSContext *ctx, JSValue this_val, int argc, static JSValue js_cell_text_extract (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); JSValue js_cell_character (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_number (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_abs (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_sign (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_floor (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_ceiling (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_round (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_trunc (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_whole (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_fraction (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_min (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_max (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_remainder (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_object (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_text_format (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_print (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); diff --git a/source/runtime.c b/source/runtime.c index 038c9457..6967c5b1 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -10514,15 +10514,46 @@ JSValue JS_CellCall (JSContext *ctx, JSValue fn, JSValue this_val, JSValue args) return js_cell_call (ctx, JS_NULL, argc, argv); } +static int js_cell_read_number_strict (JSValue val, double *out) { + uint32_t tag = JS_VALUE_GET_TAG (val); + if (tag == JS_TAG_INT) { + *out = (double)JS_VALUE_GET_INT (val); + return 0; + } + if (JS_TAG_IS_FLOAT64 (tag)) { + *out = JS_VALUE_GET_FLOAT64 (val); + return 0; + } + return -1; +} + +static JSValue js_cell_number_from_double (JSContext *ctx, double d) { + if (d >= INT32_MIN && d <= INT32_MAX) { + int32_t i = (int32_t)d; + if ((double)i == d) + return JS_NewInt32 (ctx, i); + } + return JS_NewFloat64 (ctx, d); +} + /* C API: modulo(a, b) - modulo operation */ JSValue JS_CellModulo (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_modulo (ctx, JS_NULL, 2, argv); + double dividend, divisor; + if (js_cell_read_number_strict (a, ÷nd) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &divisor) < 0) return JS_NULL; + if (isnan (dividend) || isnan (divisor)) return JS_NULL; + if (divisor == 0.0) return JS_NULL; + if (dividend == 0.0) return JS_NewFloat64 (ctx, 0.0); + return js_cell_number_from_double (ctx, + dividend - (divisor * floor (dividend / divisor))); } /* C API: neg(val) - negate number */ JSValue JS_CellNeg (JSContext *ctx, JSValue val) { - return js_cell_neg (ctx, JS_NULL, 1, &val); + double d; + if (js_cell_read_number_strict (val, &d) < 0) return JS_NULL; + if (isnan (d)) return JS_NULL; + return js_cell_number_from_double (ctx, -d); } /* C API: not(val) - logical not */ @@ -10647,60 +10678,86 @@ JSValue JS_CellNumber (JSContext *ctx, JSValue val) { /* C API: abs(num) - absolute value */ JSValue JS_CellAbs (JSContext *ctx, JSValue num) { - return js_cell_number_abs (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, fabs (d)); } /* C API: sign(num) - sign of number (-1, 0, 1) */ JSValue JS_CellSign (JSContext *ctx, JSValue num) { - return js_cell_number_sign (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + if (d < 0) return JS_NewInt32 (ctx, -1); + if (d > 0) return JS_NewInt32 (ctx, 1); + return JS_NewInt32 (ctx, 0); } /* C API: floor(num) - floor */ JSValue JS_CellFloor (JSContext *ctx, JSValue num) { - return js_cell_number_floor (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, floor (d)); } /* C API: ceiling(num) - ceiling */ JSValue JS_CellCeiling (JSContext *ctx, JSValue num) { - return js_cell_number_ceiling (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, ceil (d)); } /* C API: round(num) - round to nearest integer */ JSValue JS_CellRound (JSContext *ctx, JSValue num) { - return js_cell_number_round (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, round (d)); } /* C API: trunc(num) - truncate towards zero */ JSValue JS_CellTrunc (JSContext *ctx, JSValue num) { - return js_cell_number_trunc (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, trunc (d)); } /* C API: whole(num) - integer part */ JSValue JS_CellWhole (JSContext *ctx, JSValue num) { - return js_cell_number_whole (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, trunc (d)); } /* C API: fraction(num) - fractional part */ JSValue JS_CellFraction (JSContext *ctx, JSValue num) { - return js_cell_number_fraction (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, d - trunc (d)); } /* C API: min(a, b) - minimum of two numbers */ JSValue JS_CellMin (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_number_min (ctx, JS_NULL, 2, argv); + double da, db; + if (js_cell_read_number_strict (a, &da) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &db) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, da < db ? da : db); } /* C API: max(a, b) - maximum of two numbers */ JSValue JS_CellMax (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_number_max (ctx, JS_NULL, 2, argv); + double da, db; + if (js_cell_read_number_strict (a, &da) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &db) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, da > db ? da : db); } /* C API: remainder(a, b) - remainder after division */ JSValue JS_CellRemainder (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_number_remainder (ctx, JS_NULL, 2, argv); + double dividend, divisor; + if (js_cell_read_number_strict (a, ÷nd) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &divisor) < 0) return JS_NULL; + if (divisor == 0.0) return JS_NULL; + return js_cell_number_from_double (ctx, + dividend - (trunc (dividend / divisor) * divisor)); } /* Object functions */ @@ -11374,7 +11431,7 @@ static void JS_AddIntrinsicBaseObjects (JSContext *ctx) { js_set_global_cfunc(ctx, "filter", js_cell_array_filter, 2); js_set_global_cfunc(ctx, "sort", js_cell_array_sort, 2); - /* Number utility functions */ + /* Number intrinsics: direct calls lower to mcode; globals remain for first-class use. */ js_set_global_cfunc(ctx, "whole", js_cell_number_whole, 1); js_set_global_cfunc(ctx, "fraction", js_cell_number_fraction, 1); js_set_global_cfunc(ctx, "floor", js_cell_number_floor, 2); From 19132c1517ea1801ff1739d6c5ab929fad64a1f8 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Thu, 19 Feb 2026 00:33:16 -0600 Subject: [PATCH 8/9] jscode --- source/mach.c | 141 +++++++++++++++++++++++++++----------- source/qbe_helpers.c | 30 ++++---- source/quickjs-internal.h | 36 +++++++--- source/runtime.c | 38 +++++----- 4 files changed, 161 insertions(+), 84 deletions(-) diff --git a/source/mach.c b/source/mach.c index 01a1c37b..ea08beda 100644 --- a/source/mach.c +++ b/source/mach.c @@ -460,6 +460,33 @@ JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count) { return frame; } +static JSValue js_new_register_code(JSContext *ctx, JSCodeRegister *code) { + JSCode *jc; + if (!code) return JS_EXCEPTION; + jc = ct_alloc(ctx, sizeof(JSCode), 8); + if (!jc) return JS_EXCEPTION; + memset(jc, 0, sizeof(JSCode)); + jc->header = objhdr_make(0, OBJ_CODE, 0, 0, 0, 0); + jc->kind = JS_CODE_KIND_REGISTER; + jc->arity = (int16_t)code->arity; + jc->u.reg.code = code; + return JS_MKPTR(jc); +} + +static JSValue js_new_native_code(JSContext *ctx, void *fn_ptr, void *dl_handle, + uint16_t nr_slots, int arity) { + JSCode *jc = ct_alloc(ctx, sizeof(JSCode), 8); + if (!jc) return JS_EXCEPTION; + memset(jc, 0, sizeof(JSCode)); + jc->header = objhdr_make(0, OBJ_CODE, 0, 0, 0, 0); + jc->kind = JS_CODE_KIND_NATIVE; + jc->arity = (int16_t)arity; + jc->u.native.fn_ptr = fn_ptr; + jc->u.native.dl_handle = dl_handle; + jc->u.native.nr_slots = nr_slots; + return JS_MKPTR(jc); +} + /* Create a register-based function from JSCodeRegister */ JSValue js_new_register_function(JSContext *ctx, JSCodeRegister *code, JSValue env, JSValue outer_frame) { /* Protect env and outer_frame from GC — js_mallocz can trigger @@ -470,50 +497,84 @@ JSValue js_new_register_function(JSContext *ctx, JSCodeRegister *code, JSValue e JS_PushGCRef(ctx, &frame_ref); frame_ref.val = outer_frame; - JSFunction *fn = js_mallocz(ctx, sizeof(JSFunction)); + JSGCRef fn_ref; + JSFunction *fn; + JSValue code_obj; + + JS_AddGCRef(ctx, &fn_ref); + fn_ref.val = JS_NULL; + + fn = js_mallocz(ctx, sizeof(JSFunction)); if (!fn) { + JS_DeleteGCRef(ctx, &fn_ref); JS_PopGCRef(ctx, &frame_ref); JS_PopGCRef(ctx, &env_ref); return JS_EXCEPTION; } + fn_ref.val = JS_MKPTR(fn); fn->header = objhdr_make(0, OBJ_FUNCTION, 0, 0, 0, 0); fn->kind = JS_FUNC_KIND_REGISTER; fn->length = code->arity; fn->name = code->name; - fn->u.reg.code = code; - fn->u.reg.env_record = env_ref.val; - fn->u.reg.outer_frame = frame_ref.val; + code_obj = js_new_register_code(ctx, code); + if (JS_IsException(code_obj)) { + JS_DeleteGCRef(ctx, &fn_ref); + JS_PopGCRef(ctx, &frame_ref); + JS_PopGCRef(ctx, &env_ref); + return JS_EXCEPTION; + } + fn = JS_VALUE_GET_FUNCTION(fn_ref.val); + fn->u.cell.code = code_obj; + fn->u.cell.env_record = env_ref.val; + fn->u.cell.outer_frame = frame_ref.val; + JSValue out = fn_ref.val; + JS_DeleteGCRef(ctx, &fn_ref); JS_PopGCRef(ctx, &frame_ref); JS_PopGCRef(ctx, &env_ref); - return JS_MKPTR(fn); + return out; } /* Create a native (QBE-compiled) function */ JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, uint16_t nr_slots, int arity, JSValue outer_frame) { JSGCRef frame_ref; + JSGCRef fn_ref; + JSFunction *fn; + JSValue code_obj; JS_PushGCRef(ctx, &frame_ref); frame_ref.val = outer_frame; + JS_AddGCRef(ctx, &fn_ref); + fn_ref.val = JS_NULL; - JSFunction *fn = js_mallocz(ctx, sizeof(JSFunction)); + fn = js_mallocz(ctx, sizeof(JSFunction)); if (!fn) { + JS_DeleteGCRef(ctx, &fn_ref); JS_PopGCRef(ctx, &frame_ref); return JS_EXCEPTION; } + fn_ref.val = JS_MKPTR(fn); fn->header = objhdr_make(0, OBJ_FUNCTION, 0, 0, 0, 0); fn->kind = JS_FUNC_KIND_NATIVE; fn->length = arity; fn->name = JS_NULL; - fn->u.native.fn_ptr = fn_ptr; - fn->u.native.dl_handle = dl_handle; - fn->u.native.nr_slots = nr_slots; - fn->u.native.outer_frame = frame_ref.val; + code_obj = js_new_native_code(ctx, fn_ptr, dl_handle, nr_slots, arity); + if (JS_IsException(code_obj)) { + JS_DeleteGCRef(ctx, &fn_ref); + JS_PopGCRef(ctx, &frame_ref); + return JS_EXCEPTION; + } + fn = JS_VALUE_GET_FUNCTION(fn_ref.val); + fn->u.cell.code = code_obj; + fn->u.cell.env_record = JS_NULL; + fn->u.cell.outer_frame = frame_ref.val; + JSValue out = fn_ref.val; + JS_DeleteGCRef(ctx, &fn_ref); JS_PopGCRef(ctx, &frame_ref); - return JS_MKPTR(fn); + return out; } /* Binary operations helper */ @@ -750,8 +811,8 @@ void __asan_on_error(void) { const char *file = NULL; uint16_t line = 0; uint32_t pc = is_first ? cur_pc : 0; - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - JSCodeRegister *code = fn->u.reg.code; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + JSCodeRegister *code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; file = code->filename_cstr; func_name = code->name_cstr; if (!is_first) @@ -787,8 +848,8 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, ctx->suspended_frame_ref.val = JS_NULL; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; - env = fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; + env = fn->u.cell.env_record; pc = ctx->suspended_pc; result = JS_NULL; #ifdef HAVE_ASAN @@ -1499,7 +1560,7 @@ vm_dispatch: /* Read env fresh from frame->function — C local env can go stale after GC */ int bx = MACH_GET_Bx(instr); JSValue key = code->cpool[bx]; - JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.reg.env_record; + JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.cell.env_record; JSValue val = JS_GetProperty(ctx, cur_env, key); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); frame->slots[a] = val; @@ -1511,7 +1572,7 @@ vm_dispatch: int bx = MACH_GET_Bx(instr); JSValue key = code->cpool[bx]; JSValue val = JS_NULL; - JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.reg.env_record; + JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.cell.env_record; if (!JS_IsNull(cur_env)) { val = JS_GetProperty(ctx, cur_env, key); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); @@ -1528,7 +1589,7 @@ vm_dispatch: /* R(A) = outer_frame[B].slots[C] — walk lexical scope chain */ int depth = b; JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); if (!target) { fprintf(stderr, "GETUP: NULL outer_frame at depth 0! pc=%d a=%d depth=%d slot=%d nr_slots=%d instr=0x%08x\n", pc-1, a, depth, c, code->nr_slots, instr); @@ -1537,7 +1598,7 @@ vm_dispatch: } for (int d = 1; d < depth; d++) { fn = JS_VALUE_GET_FUNCTION(target->function); - JSFrameRegister *next = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + JSFrameRegister *next = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); if (!next) { fprintf(stderr, "GETUP: NULL outer_frame at depth %d! pc=%d a=%d depth=%d slot=%d nr_slots=%d instr=0x%08x\n", d, pc-1, a, depth, c, code->nr_slots, instr); @@ -1554,10 +1615,10 @@ vm_dispatch: /* outer_frame[B].slots[C] = R(A) — walk lexical scope chain */ int depth = b; JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); for (int d = 1; d < depth; d++) { fn = JS_VALUE_GET_FUNCTION(target->function); - target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); } target->slots[c] = frame->slots[a]; VM_BREAK(); @@ -1651,9 +1712,9 @@ vm_dispatch: const char *callee_file = "?"; { JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(frame->function); - if (callee_fn->kind == JS_FUNC_KIND_REGISTER && callee_fn->u.reg.code) { - if (callee_fn->u.reg.code->name_cstr) callee_name = callee_fn->u.reg.code->name_cstr; - if (callee_fn->u.reg.code->filename_cstr) callee_file = callee_fn->u.reg.code->filename_cstr; + if (callee_fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code) { + if (JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->name_cstr) callee_name = JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->name_cstr; + if (JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->filename_cstr) callee_file = JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->filename_cstr; } } #endif @@ -1663,8 +1724,8 @@ vm_dispatch: frame_ref.val = JS_MKPTR(frame); int ret_info = JS_VALUE_GET_INT(frame->address); JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; - env = fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; + env = fn->u.cell.env_record; pc = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; if (ret_slot != 0xFFFF) { @@ -1696,8 +1757,8 @@ vm_dispatch: frame_ref.val = JS_MKPTR(frame); int ret_info = JS_VALUE_GET_INT(frame->address); JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; - env = fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; + env = fn->u.cell.env_record; pc = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; if (ret_slot != 0xFFFF) frame->slots[ret_slot] = result; @@ -1725,7 +1786,7 @@ vm_dispatch: if ((uint32_t)bx < code->func_count) { JSCodeRegister *fn_code = code->functions[bx]; /* Read env fresh from frame->function — C local can be stale */ - JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.reg.env_record; + JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.cell.env_record; JSValue fn_val = js_new_register_function(ctx, fn_code, cur_env, frame_ref.val); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); frame->slots[a] = fn_val; @@ -2112,7 +2173,7 @@ vm_dispatch: if (fn->kind == JS_FUNC_KIND_REGISTER) { /* Register function: switch frames inline (fast path) */ - JSCodeRegister *fn_code = fn->u.reg.code; + JSCodeRegister *fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots); if (!new_frame) { frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); @@ -2122,7 +2183,7 @@ vm_dispatch: fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]); fn_val = fr->function; fn = JS_VALUE_GET_FUNCTION(fn_val); - fn_code = fn->u.reg.code; + fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; new_frame->function = fn_val; /* Copy this + args from call frame to new frame */ int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity; @@ -2135,7 +2196,7 @@ vm_dispatch: frame = new_frame; frame_ref.val = JS_MKPTR(frame); code = fn_code; - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->entry_point; } else { /* C, native, or bytecode function */ @@ -2182,7 +2243,7 @@ vm_dispatch: JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val); if (fn->kind == JS_FUNC_KIND_REGISTER) { - JSCodeRegister *fn_code = fn->u.reg.code; + JSCodeRegister *fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; int current_slots = (int)objhdr_cap56(frame->header); if (fn_code->nr_slots <= current_slots) { @@ -2197,7 +2258,7 @@ vm_dispatch: frame->function = fn_val; /* caller stays the same — we're reusing this frame */ code = fn_code; - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->entry_point; } else { /* SLOW PATH: callee needs more slots, must allocate */ @@ -2210,7 +2271,7 @@ vm_dispatch: fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]); fn_val = fr->function; fn = JS_VALUE_GET_FUNCTION(fn_val); - fn_code = fn->u.reg.code; + fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; new_frame->function = fn_val; int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity; new_frame->slots[0] = fr->slots[0]; /* this */ @@ -2221,7 +2282,7 @@ vm_dispatch: frame = new_frame; frame_ref.val = JS_MKPTR(frame); code = fn_code; - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->entry_point; } } else { @@ -2249,8 +2310,8 @@ vm_dispatch: frame_ref.val = JS_MKPTR(frame); int ret_info = JS_VALUE_GET_INT(frame->address); JSFunction *ret_fn = JS_VALUE_GET_FUNCTION(frame->function); - code = ret_fn->u.reg.code; - env = ret_fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(ret_fn->u.cell.code)->u.reg.code; + env = ret_fn->u.cell.env_record; pc = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; if (ret_slot != 0xFFFF) frame->slots[ret_slot] = ret; @@ -2302,10 +2363,10 @@ vm_dispatch: uint32_t frame_pc = pc; for (;;) { JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; /* Only enter handler if we're not already inside it */ if (code->disruption_pc > 0 && frame_pc < code->disruption_pc) { - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->disruption_pc; ctx->disruption_reported = FALSE; frame_ref.val = JS_MKPTR(frame); /* root handler frame for GC */ diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index e12309c3..9f96f2b9 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -544,7 +544,7 @@ static JSValue *get_outer_frame_slots(JSValue *fp) { JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); if (fn->kind != JS_FUNC_KIND_NATIVE) return NULL; - JSValue outer = fn->u.native.outer_frame; + JSValue outer = fn->u.cell.outer_frame; if (JS_IsNull(outer)) return NULL; JSFrameRegister *outer_frame = (JSFrameRegister *)JS_VALUE_GET_PTR(outer); @@ -780,11 +780,11 @@ void cell_rt_signal_tail_call(JSContext *ctx, void *fp, int64_t frame_slot) { JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv) { JSFunction *f = JS_VALUE_GET_FUNCTION(func_obj); - cell_compiled_fn fn = (cell_compiled_fn)f->u.native.fn_ptr; - int nr_slots = f->u.native.nr_slots; + cell_compiled_fn fn = (cell_compiled_fn)JS_VALUE_GET_CODE(f->u.cell.code)->u.native.fn_ptr; + int nr_slots = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots; int arity = f->length; void *prev_dl_handle = g_current_dl_handle; - g_current_dl_handle = f->u.native.dl_handle; + g_current_dl_handle = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.dl_handle; #define RETURN_DISPATCH(v) \ do { \ @@ -832,7 +832,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (JS_IsFunction(frame->function)) { JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); if (cur_fn->kind == JS_FUNC_KIND_NATIVE) - g_current_dl_handle = cur_fn->u.native.dl_handle; + g_current_dl_handle = JS_VALUE_GET_CODE(cur_fn->u.cell.code)->u.native.dl_handle; } JSValue result = fn(ctx, fp); @@ -865,7 +865,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JS_RaiseDisrupt(ctx, "not a function"); /* Resume caller with exception pending */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_fn->u.cell.code)->u.native.fn_ptr; JS_PopGCRef(ctx, &callee_ref); continue; } @@ -877,7 +877,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (callee_fn->kind == JS_FUNC_KIND_NATIVE) { /* Native-to-native call — no C stack growth */ - cell_compiled_fn callee_ptr = (cell_compiled_fn)callee_fn->u.native.fn_ptr; + cell_compiled_fn callee_ptr = (cell_compiled_fn)JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.native.fn_ptr; if (pending_is_tail) { /* Tail call: replace current frame with the prepared callee frame. */ @@ -921,7 +921,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_fn->u.cell.code)->u.native.fn_ptr; JS_PopGCRef(ctx, &callee_fn_ref); JS_PopGCRef(ctx, &callee_ref); continue; @@ -959,7 +959,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* fn and fp still point to the calling native function's frame. Just resume it — it will detect JS_EXCEPTION in the return slot. */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_fn->u.cell.code)->u.native.fn_ptr; JS_PopGCRef(ctx, &callee_ref); continue; } @@ -990,7 +990,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fp[ret_slot] = ret; /* Resume caller */ JSFunction *caller_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)caller_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(caller_fn->u.cell.code)->u.native.fn_ptr; } else { /* Regular call: store result and resume current function */ int ret_info = JS_VALUE_GET_INT(frame->address); @@ -999,7 +999,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fp[ret_slot] = ret; /* fn stays the same — we resume the same function at next segment */ JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)cur_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(cur_fn->u.cell.code)->u.native.fn_ptr; } } JS_PopGCRef(ctx, &callee_fn_ref); @@ -1033,7 +1033,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fp[ret_slot] = JS_EXCEPTION; JSFunction *exc_caller_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)exc_caller_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_caller_fn->u.cell.code)->u.native.fn_ptr; continue; } @@ -1057,7 +1057,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fp[ret_slot] = result; JSFunction *caller_fn = JS_VALUE_GET_FUNCTION(frame->function); - fn = (cell_compiled_fn)caller_fn->u.native.fn_ptr; + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(caller_fn->u.cell.code)->u.native.fn_ptr; continue; } @@ -1098,8 +1098,8 @@ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) { } int nr_slots = (int)nargs + 2; JSFunction *f = JS_VALUE_GET_FUNCTION(fn); - if (f->kind == JS_FUNC_KIND_NATIVE && f->u.native.nr_slots > nr_slots) - nr_slots = f->u.native.nr_slots; + if (f->kind == JS_FUNC_KIND_NATIVE && JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots > nr_slots) + nr_slots = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots; JSFrameRegister *new_frame = alloc_frame_register(ctx, nr_slots); if (!new_frame) return JS_EXCEPTION; new_frame->function = fn; diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index 4688415d..73dcdad7 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -120,8 +120,8 @@ typedef struct JSBlob JSBlob; typedef struct JSText JSText; typedef struct JSRecord JSRecord; typedef struct JSFunction JSFunction; -typedef struct JSFrame JSFrame; typedef struct JSCode JSCode; +typedef struct JSFrame JSFrame; #define OBJHDR_CAP_SHIFT 8u #define OBJHDR_CAP_MASK (((objhdr_t)1ull << 56) - 1ull) @@ -278,7 +278,6 @@ typedef void (*JSLogCallback)(JSContext *ctx, const char *channel, const char *m /* Forward declaration for bytecode freeing */ #define JS_VALUE_GET_BLOB(v) ((JSBlob *)JS_VALUE_GET_PTR (v)) -#define JS_VALUE_GET_CODE(v) (JS_VALUE_GET_PTR (v)) #ifdef HEAP_CHECK void heap_check_fail(void *ptr, struct JSContext *ctx); @@ -286,6 +285,7 @@ void heap_check_fail(void *ptr, struct JSContext *ctx); #define JS_VALUE_GET_OBJ(v) ((JSRecord *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_TEXT(v) ((JSText *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_FUNCTION(v) ((JSFunction *)heap_check_chase(ctx, v)) +#define JS_VALUE_GET_CODE(v) ((JSCode *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_FRAME(v) ((JSFrame *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_STRING(v) ((JSText *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_RECORD(v) ((JSRecord *)heap_check_chase(ctx, v)) @@ -294,6 +294,7 @@ void heap_check_fail(void *ptr, struct JSContext *ctx); #define JS_VALUE_GET_OBJ(v) ((JSRecord *)chase (v)) #define JS_VALUE_GET_TEXT(v) ((JSText *)chase (v)) #define JS_VALUE_GET_FUNCTION(v) ((JSFunction *)chase (v)) +#define JS_VALUE_GET_CODE(v) ((JSCode *)chase (v)) #define JS_VALUE_GET_FRAME(v) ((JSFrame *)chase (v)) #define JS_VALUE_GET_STRING(v) ((JSText *)chase (v)) #define JS_VALUE_GET_RECORD(v) ((JSRecord *)chase (v)) @@ -1341,6 +1342,27 @@ typedef enum { JS_FUNC_KIND_NATIVE, /* QBE-compiled native function */ } JSFunctionKind; +typedef enum { + JS_CODE_KIND_REGISTER = 1, + JS_CODE_KIND_NATIVE = 2, +} JSCodeKind; + +typedef struct JSCode { + objhdr_t header; /* OBJ_CODE */ + uint8_t kind; + int16_t arity; + union { + struct { + JSCodeRegister *code; + } reg; + struct { + void *fn_ptr; /* compiled cell_fn_N pointer */ + void *dl_handle; /* dylib handle for dlsym lookups */ + uint16_t nr_slots; /* frame size for this function */ + } native; + } u; +} JSCode; + typedef struct JSFunction { objhdr_t header; /* must come first */ JSValue name; /* function name as JSValue text */ @@ -1353,16 +1375,10 @@ typedef struct JSFunction { int16_t magic; } cfunc; struct { - JSCodeRegister *code; /* compiled register code (off-heap) */ + JSValue code; /* JSCode object (OBJ_CODE) */ JSValue env_record; /* stone record, module environment */ JSValue outer_frame; /* JSFrame JSValue, for closures */ - } reg; - struct { - void *fn_ptr; /* compiled cell_fn_N pointer */ - void *dl_handle; /* dylib handle for dlsym lookups */ - uint16_t nr_slots; /* frame size for this function */ - JSValue outer_frame; /* GC-traced, for closures */ - } native; + } cell; } u; } JSFunction; diff --git a/source/runtime.c b/source/runtime.c index 663d7900..8bc865a4 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -53,8 +53,8 @@ void heap_check_fail(void *ptr, JSContext *ctx) { JSFunction *fn = (JSFunction *)JS_VALUE_GET_PTR(frame->function); const char *name = NULL, *file = NULL; uint16_t line = 0; - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - JSCodeRegister *code = fn->u.reg.code; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + JSCodeRegister *code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; file = code->filename_cstr; name = code->name_cstr; if (!first) @@ -1394,14 +1394,14 @@ void gc_scan_object (JSContext *ctx, void *ptr, uint8_t *from_base, uint8_t *fro JSFunction *fn = (JSFunction *)ptr; /* Scan the function name */ fn->name = gc_copy_value (ctx, fn->name, from_base, from_end, to_base, to_free, to_end); - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { /* Scan code tree to arbitrary nesting depth */ - gc_scan_code_tree (ctx, fn->u.reg.code, from_base, from_end, to_base, to_free, to_end); + gc_scan_code_tree (ctx, JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code, from_base, from_end, to_base, to_free, to_end); /* Scan outer_frame and env_record */ - fn->u.reg.outer_frame = gc_copy_value (ctx, fn->u.reg.outer_frame, from_base, from_end, to_base, to_free, to_end); - fn->u.reg.env_record = gc_copy_value (ctx, fn->u.reg.env_record, from_base, from_end, to_base, to_free, to_end); + fn->u.cell.outer_frame = gc_copy_value (ctx, fn->u.cell.outer_frame, from_base, from_end, to_base, to_free, to_end); + fn->u.cell.env_record = gc_copy_value (ctx, fn->u.cell.env_record, from_base, from_end, to_base, to_free, to_end); } else if (fn->kind == JS_FUNC_KIND_NATIVE) { - fn->u.native.outer_frame = gc_copy_value (ctx, fn->u.native.outer_frame, from_base, from_end, to_base, to_free, to_end); + fn->u.cell.outer_frame = gc_copy_value (ctx, fn->u.cell.outer_frame, from_base, from_end, to_base, to_free, to_end); } break; } @@ -1434,10 +1434,10 @@ void gc_scan_object (JSContext *ctx, void *ptr, uint8_t *from_base, uint8_t *fro objhdr_t fh = *(objhdr_t *)JS_VALUE_GET_PTR (frame->function); if (objhdr_type (fh) == OBJ_FUNCTION) { JSFunction *fn = (JSFunction *)JS_VALUE_GET_PTR (frame->function); - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - if (fn->u.reg.code->name_cstr) fname = fn->u.reg.code->name_cstr; - if (fn->u.reg.code->filename_cstr) ffile = fn->u.reg.code->filename_cstr; - fnslots = fn->u.reg.code->nr_slots; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + if (JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->name_cstr) fname = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->name_cstr; + if (JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->filename_cstr) ffile = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->filename_cstr; + fnslots = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->nr_slots; } } } @@ -1543,8 +1543,8 @@ int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size) { } if (objhdr_type (fnh) == OBJ_FUNCTION) { JSFunction *fnp = (JSFunction *)JS_VALUE_GET_PTR (fn_v); - if (fnp->kind == JS_FUNC_KIND_REGISTER && fnp->u.reg.code && fnp->u.reg.code->name_cstr) - fn_name = fnp->u.reg.code->name_cstr; + if (fnp->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fnp->u.cell.code)->u.reg.code && JS_VALUE_GET_CODE(fnp->u.cell.code)->u.reg.code->name_cstr) + fn_name = JS_VALUE_GET_CODE(fnp->u.cell.code)->u.reg.code->name_cstr; } } fprintf (stderr, "VALIDATE_GC: pre-gc frame %p slot[%llu] -> %p (chased %p) bad type %d (hdr=0x%llx) fn=%s\n", @@ -4740,8 +4740,8 @@ JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, case JS_FUNC_KIND_C_DATA: return js_call_c_function (ctx, func_obj, this_obj, argc, argv); case JS_FUNC_KIND_REGISTER: - return JS_CallRegisterVM (ctx, f->u.reg.code, this_obj, argc, argv, - f->u.reg.env_record, f->u.reg.outer_frame); + return JS_CallRegisterVM (ctx, JS_VALUE_GET_CODE(f->u.cell.code)->u.reg.code, this_obj, argc, argv, + f->u.cell.env_record, f->u.cell.outer_frame); case JS_FUNC_KIND_NATIVE: return cell_native_dispatch (ctx, func_obj, this_obj, argc, argv); default: @@ -4763,8 +4763,8 @@ JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, J case JS_FUNC_KIND_C: return js_call_c_function (ctx, func_obj, this_obj, argc, argv); case JS_FUNC_KIND_REGISTER: - return JS_CallRegisterVM (ctx, f->u.reg.code, this_obj, argc, argv, - f->u.reg.env_record, f->u.reg.outer_frame); + return JS_CallRegisterVM (ctx, JS_VALUE_GET_CODE(f->u.cell.code)->u.reg.code, this_obj, argc, argv, + f->u.cell.env_record, f->u.cell.outer_frame); case JS_FUNC_KIND_NATIVE: return cell_native_dispatch (ctx, func_obj, this_obj, argc, argv); default: @@ -12751,8 +12751,8 @@ JSValue JS_GetStack(JSContext *ctx) { if (!JS_IsFunction(frame->function)) break; JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - JSCodeRegister *code = fn->u.reg.code; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + JSCodeRegister *code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; uint32_t pc = is_first ? cur_pc : (uint32_t)(JS_VALUE_GET_INT(frame->address) >> 16); frames[count].fn = code->name_cstr; frames[count].file = code->filename_cstr; From 3f206d80dd64097cf66278a67cd1d18323ec934a Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Thu, 19 Feb 2026 00:47:34 -0600 Subject: [PATCH 9/9] jscode --- source/mach.c | 20 +- source/qbe_helpers.c | 493 ++++++++++++++++++++++---------------- source/quickjs-internal.h | 5 +- source/runtime.c | 3 +- 4 files changed, 299 insertions(+), 222 deletions(-) diff --git a/source/mach.c b/source/mach.c index ea08beda..fa74b598 100644 --- a/source/mach.c +++ b/source/mach.c @@ -536,13 +536,10 @@ JSValue js_new_register_function(JSContext *ctx, JSCodeRegister *code, JSValue e return out; } -/* Create a native (QBE-compiled) function */ -JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, - uint16_t nr_slots, int arity, JSValue outer_frame) { +JSValue js_new_native_function_with_code(JSContext *ctx, JSValue code_obj, int arity, JSValue outer_frame) { JSGCRef frame_ref; JSGCRef fn_ref; JSFunction *fn; - JSValue code_obj; JS_PushGCRef(ctx, &frame_ref); frame_ref.val = outer_frame; JS_AddGCRef(ctx, &fn_ref); @@ -560,12 +557,6 @@ JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, fn->kind = JS_FUNC_KIND_NATIVE; fn->length = arity; fn->name = JS_NULL; - code_obj = js_new_native_code(ctx, fn_ptr, dl_handle, nr_slots, arity); - if (JS_IsException(code_obj)) { - JS_DeleteGCRef(ctx, &fn_ref); - JS_PopGCRef(ctx, &frame_ref); - return JS_EXCEPTION; - } fn = JS_VALUE_GET_FUNCTION(fn_ref.val); fn->u.cell.code = code_obj; fn->u.cell.env_record = JS_NULL; @@ -577,6 +568,15 @@ JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, return out; } +/* Create a native (QBE-compiled) function */ +JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, + uint16_t nr_slots, int arity, JSValue outer_frame) { + JSValue code_obj = js_new_native_code(ctx, fn_ptr, dl_handle, nr_slots, arity); + if (JS_IsException(code_obj)) + return JS_EXCEPTION; + return js_new_native_function_with_code(ctx, code_obj, arity, outer_frame); +} + /* Binary operations helper */ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { /* Fast path for integers */ diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 9f96f2b9..ba90379a 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -12,12 +12,6 @@ #include #include -#if defined(__GNUC__) || defined(__clang__) -#define CELL_THREAD_LOCAL __thread -#else -#define CELL_THREAD_LOCAL _Thread_local -#endif - /* Non-inline wrappers for static inline functions in quickjs.h */ JSValue qbe_new_float64(JSContext *ctx, double d) { return __JS_NewFloat64(ctx, d); @@ -230,36 +224,72 @@ JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b) { /* --- Property access --- */ -/* Current module handle for active native dispatch. */ -static CELL_THREAD_LOCAL void *g_current_dl_handle = NULL; - typedef struct { void *dl_handle; JSContext *ctx; JSGCRef *vals; int count; } AOTLiteralPool; +typedef struct { + const char *name; + JSValue key; +} AOTKeyCacheEntry; +typedef struct { + void *dl_handle; + int64_t fn_idx; + JSValue code; +} AOTCodeCacheEntry; -static CELL_THREAD_LOCAL AOTLiteralPool g_aot_lit_pool = {0}; +typedef struct AOTGCRefChunk AOTGCRefChunk; -static void aot_clear_lit_pool(void) { - if (g_aot_lit_pool.vals) { - if (g_aot_lit_pool.ctx) { - for (int i = 0; i < g_aot_lit_pool.count; i++) - JS_DeleteGCRef(g_aot_lit_pool.ctx, &g_aot_lit_pool.vals[i]); - } - free(g_aot_lit_pool.vals); +typedef struct { + void *current_dl_handle; + AOTLiteralPool lit_pool; + AOTKeyCacheEntry *key_cache; + int key_cache_count; + int key_cache_cap; + AOTCodeCacheEntry *code_cache; + int code_cache_count; + int code_cache_cap; + JSGCRef native_env_ref; + int has_native_env; + int native_env_ref_inited; + AOTGCRefChunk **gc_ref_chunks; + int gc_ref_chunk_count; + int aot_depth; + JSValue pending_callee_frame; + int pending_is_tail; +} NativeRTState; + +static NativeRTState *native_state(JSContext *ctx) { + NativeRTState *st = (NativeRTState *)ctx->native_state; + if (st) return st; + st = js_mallocz_rt(sizeof(*st)); + if (!st) { + JS_RaiseOOM(ctx); + return NULL; } - g_aot_lit_pool.dl_handle = NULL; - g_aot_lit_pool.ctx = NULL; - g_aot_lit_pool.vals = NULL; - g_aot_lit_pool.count = 0; + ctx->native_state = st; + return st; } -static int aot_load_lit_pool(JSContext *ctx, void *dl_handle) { - aot_clear_lit_pool(); - g_aot_lit_pool.dl_handle = dl_handle; - g_aot_lit_pool.ctx = ctx; +static void aot_clear_lit_pool(JSContext *ctx, NativeRTState *st) { + if (!st) return; + if (st->lit_pool.vals) { + for (int i = 0; i < st->lit_pool.count; i++) + JS_DeleteGCRef(ctx, &st->lit_pool.vals[i]); + free(st->lit_pool.vals); + } + st->lit_pool.dl_handle = NULL; + st->lit_pool.ctx = NULL; + st->lit_pool.vals = NULL; + st->lit_pool.count = 0; +} + +static int aot_load_lit_pool(JSContext *ctx, NativeRTState *st, void *dl_handle) { + aot_clear_lit_pool(ctx, st); + st->lit_pool.dl_handle = dl_handle; + st->lit_pool.ctx = ctx; if (!dl_handle) return 1; @@ -269,20 +299,20 @@ static int aot_load_lit_pool(JSContext *ctx, void *dl_handle) { if (count <= 0 || !table_ptr) return 1; - g_aot_lit_pool.vals = (JSGCRef *)calloc((size_t)count, sizeof(JSGCRef)); - if (!g_aot_lit_pool.vals) { + st->lit_pool.vals = (JSGCRef *)calloc((size_t)count, sizeof(JSGCRef)); + if (!st->lit_pool.vals) { JS_RaiseOOM(ctx); return 0; } - g_aot_lit_pool.count = 0; + st->lit_pool.count = 0; for (int i = 0; i < count; i++) { const char *cstr = table_ptr[i] ? table_ptr[i] : ""; - JS_AddGCRef(ctx, &g_aot_lit_pool.vals[i]); - g_aot_lit_pool.count = i + 1; - g_aot_lit_pool.vals[i].val = js_key_new(ctx, cstr); - if (JS_IsException(g_aot_lit_pool.vals[i].val)) { - aot_clear_lit_pool(); + JS_AddGCRef(ctx, &st->lit_pool.vals[i]); + st->lit_pool.count = i + 1; + st->lit_pool.vals[i].val = js_key_new(ctx, cstr); + if (JS_IsException(st->lit_pool.vals[i].val)) { + aot_clear_lit_pool(ctx, st); return 0; } } @@ -290,70 +320,56 @@ static int aot_load_lit_pool(JSContext *ctx, void *dl_handle) { } static JSValue aot_lit_from_index(JSContext *ctx, int64_t lit_idx) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; if (lit_idx < 0) { JS_RaiseDisrupt(ctx, "literal index out of range"); return JS_EXCEPTION; } - if (g_aot_lit_pool.dl_handle != g_current_dl_handle || g_aot_lit_pool.ctx != ctx) { - if (!aot_load_lit_pool(ctx, g_current_dl_handle)) + if (st->lit_pool.dl_handle != st->current_dl_handle || st->lit_pool.ctx != ctx) { + if (!aot_load_lit_pool(ctx, st, st->current_dl_handle)) return JS_EXCEPTION; } - if (lit_idx >= g_aot_lit_pool.count) { + if (lit_idx >= st->lit_pool.count) { JS_RaiseDisrupt(ctx, "literal index out of range"); return JS_EXCEPTION; } - return g_aot_lit_pool.vals[lit_idx].val; + return st->lit_pool.vals[lit_idx].val; } -typedef struct { - const char *name; - JSValue key; -} AOTKeyCacheEntry; - -static CELL_THREAD_LOCAL JSContext *g_aot_key_cache_ctx = NULL; -static CELL_THREAD_LOCAL AOTKeyCacheEntry *g_aot_key_cache = NULL; -static CELL_THREAD_LOCAL int g_aot_key_cache_count = 0; -static CELL_THREAD_LOCAL int g_aot_key_cache_cap = 0; - /* Convert a static C string to an interned JSValue key. - Uses a small thread-local cache keyed by C-string pointer to avoid + Uses a small per-actor cache keyed by C-string pointer to avoid repeated UTF-8 decoding in hot property paths. */ static JSValue aot_key_from_cstr(JSContext *ctx, const char *name) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; if (!name) return JS_NULL; - if (g_aot_key_cache_ctx != ctx) { - free(g_aot_key_cache); - g_aot_key_cache = NULL; - g_aot_key_cache_count = 0; - g_aot_key_cache_cap = 0; - g_aot_key_cache_ctx = ctx; - } - - for (int i = 0; i < g_aot_key_cache_count; i++) { - if (g_aot_key_cache[i].name == name) - return g_aot_key_cache[i].key; + for (int i = 0; i < st->key_cache_count; i++) { + if (st->key_cache[i].name == name) + return st->key_cache[i].key; } JSValue key = js_key_new(ctx, name); if (JS_IsNull(key)) return JS_RaiseDisrupt(ctx, "invalid property key"); - if (g_aot_key_cache_count >= g_aot_key_cache_cap) { - int new_cap = g_aot_key_cache_cap ? (g_aot_key_cache_cap * 2) : 64; + if (st->key_cache_count >= st->key_cache_cap) { + int new_cap = st->key_cache_cap ? (st->key_cache_cap * 2) : 64; AOTKeyCacheEntry *new_cache = - (AOTKeyCacheEntry *)realloc(g_aot_key_cache, (size_t)new_cap * sizeof(*new_cache)); + (AOTKeyCacheEntry *)realloc(st->key_cache, (size_t)new_cap * sizeof(*new_cache)); if (!new_cache) return JS_RaiseOOM(ctx); - g_aot_key_cache = new_cache; - g_aot_key_cache_cap = new_cap; + st->key_cache = new_cache; + st->key_cache_cap = new_cap; } - g_aot_key_cache[g_aot_key_cache_count].name = name; - g_aot_key_cache[g_aot_key_cache_count].key = key; - g_aot_key_cache_count++; + st->key_cache[st->key_cache_count].name = name; + st->key_cache[st->key_cache_count].key = key; + st->key_cache_count++; return key; } @@ -460,42 +476,44 @@ int cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, /* --- Intrinsic/global lookup --- */ -/* Native module environment — set before executing a native module's cell_main. - Contains runtime functions (starts_with, ends_with, etc.) and use(). */ -static CELL_THREAD_LOCAL JSGCRef g_native_env_ref; -static CELL_THREAD_LOCAL int g_has_native_env = 0; - void cell_rt_set_native_env(JSContext *ctx, JSValue env) { + NativeRTState *st = native_state(ctx); + if (!st) return; if (!JS_IsNull(env) && !JS_IsStone(env)) { fprintf(stderr, "cell_rt_set_native_env: ERROR env not stone\n"); abort(); } /* Drop module literal pool roots before switching native env/module. */ - aot_clear_lit_pool(); + aot_clear_lit_pool(ctx, st); - /* Native module boundary: clear per-thread key cache so stale keys + /* Native module boundary: clear per-actor key cache so stale keys cannot survive across context/module lifetimes. */ - free(g_aot_key_cache); - g_aot_key_cache = NULL; - g_aot_key_cache_count = 0; - g_aot_key_cache_cap = 0; - g_aot_key_cache_ctx = ctx; + free(st->key_cache); + st->key_cache = NULL; + st->key_cache_count = 0; + st->key_cache_cap = 0; - if (g_has_native_env) - JS_DeleteGCRef(ctx, &g_native_env_ref); + if (st->has_native_env && st->native_env_ref_inited) { + JS_DeleteGCRef(ctx, &st->native_env_ref); + st->native_env_ref_inited = 0; + } if (!JS_IsNull(env)) { - JS_AddGCRef(ctx, &g_native_env_ref); - g_native_env_ref.val = env; - g_has_native_env = 1; + JS_AddGCRef(ctx, &st->native_env_ref); + st->native_env_ref_inited = 1; + st->native_env_ref.val = env; + st->has_native_env = 1; } else { - g_has_native_env = 0; + st->has_native_env = 0; + st->native_env_ref.val = JS_NULL; } } static JSValue cell_rt_get_intrinsic_key(JSContext *ctx, JSValue key) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; /* Check native env first (runtime-provided functions like log) */ - if (g_has_native_env) { - JSValue v = JS_GetProperty(ctx, g_native_env_ref.val, key); + if (st->has_native_env) { + JSValue v = JS_GetProperty(ctx, st->native_env_ref.val, key); if (!JS_IsNull(v)) return v; } @@ -586,75 +604,51 @@ typedef struct AOTGCRefChunk { uint8_t inited[AOT_GC_REF_CHUNK_SIZE]; } AOTGCRefChunk; -static CELL_THREAD_LOCAL AOTGCRefChunk **g_aot_gc_ref_chunks = NULL; -static CELL_THREAD_LOCAL int g_aot_gc_ref_chunk_count = 0; -static CELL_THREAD_LOCAL int g_aot_depth = 0; -static CELL_THREAD_LOCAL JSContext *g_aot_gc_ref_ctx = NULL; - -int cell_rt_native_active(void) { - return g_aot_depth > 0; +int cell_rt_native_active(JSContext *ctx) { + NativeRTState *st = (NativeRTState *)ctx->native_state; + return st ? (st->aot_depth > 0) : 0; } -static int ensure_aot_gc_ref_slot(JSContext *ctx, int depth_index) { +static int ensure_aot_gc_ref_slot(JSContext *ctx, NativeRTState *st, int depth_index) { if (depth_index < 0) return 0; int needed_chunks = (depth_index / AOT_GC_REF_CHUNK_SIZE) + 1; - if (needed_chunks <= g_aot_gc_ref_chunk_count) + if (needed_chunks <= st->gc_ref_chunk_count) return 1; AOTGCRefChunk **new_chunks = - (AOTGCRefChunk **)realloc(g_aot_gc_ref_chunks, + (AOTGCRefChunk **)realloc(st->gc_ref_chunks, (size_t)needed_chunks * sizeof(*new_chunks)); if (!new_chunks) { JS_RaiseOOM(ctx); return 0; } - g_aot_gc_ref_chunks = new_chunks; - for (int i = g_aot_gc_ref_chunk_count; i < needed_chunks; i++) { - g_aot_gc_ref_chunks[i] = (AOTGCRefChunk *)calloc(1, sizeof(AOTGCRefChunk)); - if (!g_aot_gc_ref_chunks[i]) { + st->gc_ref_chunks = new_chunks; + for (int i = st->gc_ref_chunk_count; i < needed_chunks; i++) { + st->gc_ref_chunks[i] = (AOTGCRefChunk *)calloc(1, sizeof(AOTGCRefChunk)); + if (!st->gc_ref_chunks[i]) { JS_RaiseOOM(ctx); return 0; } } - g_aot_gc_ref_chunk_count = needed_chunks; + st->gc_ref_chunk_count = needed_chunks; return 1; } -static inline JSGCRef *aot_gc_ref_at(int depth_index) { +static inline JSGCRef *aot_gc_ref_at(NativeRTState *st, int depth_index) { int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; - return &g_aot_gc_ref_chunks[chunk_index]->refs[slot_index]; + return &st->gc_ref_chunks[chunk_index]->refs[slot_index]; } -static inline uint8_t *aot_gc_ref_inited_at(int depth_index) { +static inline uint8_t *aot_gc_ref_inited_at(NativeRTState *st, int depth_index) { int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; - return &g_aot_gc_ref_chunks[chunk_index]->inited[slot_index]; + return &st->gc_ref_chunks[chunk_index]->inited[slot_index]; } -/* GC refs are owned by a specific JSContext. If context changes on this thread, - unregister previous refs and reset per-slot initialization state. */ -static void aot_gc_ref_reset_ctx(JSContext *ctx) { - if (g_aot_gc_ref_ctx == ctx) - return; - if (g_aot_gc_ref_ctx) { - for (int ci = 0; ci < g_aot_gc_ref_chunk_count; ci++) { - AOTGCRefChunk *chunk = g_aot_gc_ref_chunks[ci]; - for (int si = 0; si < AOT_GC_REF_CHUNK_SIZE; si++) { - if (chunk->inited[si]) { - JS_DeleteGCRef(g_aot_gc_ref_ctx, &chunk->refs[si]); - chunk->inited[si] = 0; - chunk->refs[si].val = JS_NULL; - } - } - } - } - g_aot_gc_ref_ctx = ctx; -} - -static inline void aot_gc_ref_activate(JSContext *ctx, int depth_index) { - JSGCRef *ref = aot_gc_ref_at(depth_index); - uint8_t *inited = aot_gc_ref_inited_at(depth_index); +static inline void aot_gc_ref_activate(JSContext *ctx, NativeRTState *st, int depth_index) { + JSGCRef *ref = aot_gc_ref_at(st, depth_index); + uint8_t *inited = aot_gc_ref_inited_at(st, depth_index); if (!*inited) { JS_AddGCRef(ctx, ref); *inited = 1; @@ -662,42 +656,45 @@ static inline void aot_gc_ref_activate(JSContext *ctx, int depth_index) { } JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { - aot_gc_ref_reset_ctx(ctx); - if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) { + NativeRTState *st = native_state(ctx); + if (!st) return NULL; + if (!ensure_aot_gc_ref_slot(ctx, st, st->aot_depth)) { return NULL; } JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots); if (!frame) return NULL; - aot_gc_ref_activate(ctx, g_aot_depth); - JSGCRef *ref = aot_gc_ref_at(g_aot_depth); + aot_gc_ref_activate(ctx, st, st->aot_depth); + JSGCRef *ref = aot_gc_ref_at(st, st->aot_depth); ref->val = JS_MKPTR(frame); - g_aot_depth++; + st->aot_depth++; return (JSValue *)frame->slots; } /* Push an already-allocated frame onto the active AOT frame stack. */ static int cell_rt_push_existing_frame(JSContext *ctx, JSValue frame_val) { - aot_gc_ref_reset_ctx(ctx); - if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) + NativeRTState *st = native_state(ctx); + if (!st) return 0; + if (!ensure_aot_gc_ref_slot(ctx, st, st->aot_depth)) return 0; - aot_gc_ref_activate(ctx, g_aot_depth); - JSGCRef *ref = aot_gc_ref_at(g_aot_depth); + aot_gc_ref_activate(ctx, st, st->aot_depth); + JSGCRef *ref = aot_gc_ref_at(st, st->aot_depth); ref->val = frame_val; - g_aot_depth++; + st->aot_depth++; return 1; } JSValue *cell_rt_refresh_fp(JSContext *ctx) { - (void)ctx; - if (g_aot_depth <= 0) { - fprintf(stderr, "[BUG] cell_rt_refresh_fp: g_aot_depth=%d\n", g_aot_depth); + NativeRTState *st = native_state(ctx); + if (!st) return NULL; + if (st->aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_refresh_fp: aot_depth=%d\n", st->aot_depth); abort(); } - JSValue val = aot_gc_ref_at(g_aot_depth - 1)->val; + JSValue val = aot_gc_ref_at(st, st->aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp: frame is NULL at depth=%d val=%lld\n", - g_aot_depth, (long long)val); + st->aot_depth, (long long)val); abort(); } return (JSValue *)frame->slots; @@ -705,13 +702,15 @@ JSValue *cell_rt_refresh_fp(JSContext *ctx) { /* Combined refresh + exception check in a single call. */ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { + NativeRTState *st = native_state(ctx); + if (!st) return NULL; if (JS_HasException(ctx)) return NULL; - if (g_aot_depth <= 0) { - fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: g_aot_depth=%d\n", g_aot_depth); + if (st->aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: aot_depth=%d\n", st->aot_depth); abort(); } - JSValue val = aot_gc_ref_at(g_aot_depth - 1)->val; + JSValue val = aot_gc_ref_at(st, st->aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: frame is NULL\n"); @@ -721,34 +720,26 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { } void cell_rt_leave_frame(JSContext *ctx) { - (void)ctx; - if (g_aot_depth <= 0) { + NativeRTState *st = native_state(ctx); + if (!st) return; + if (st->aot_depth <= 0) { fprintf(stderr, "[BUG] cell_rt_leave_frame underflow\n"); abort(); } - g_aot_depth--; - aot_gc_ref_at(g_aot_depth)->val = JS_NULL; + st->aot_depth--; + aot_gc_ref_at(st, st->aot_depth)->val = JS_NULL; } /* --- Function creation and calling --- */ typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); -/* Set before executing a native module's cell_main — - used by cell_rt_make_function to resolve fn_ptr via dlsym */ -/* g_current_dl_handle is defined near property/literal helpers. */ - /* ============================================================ Dispatch loop — the core of native function execution. Each compiled cell_fn_N returns to this loop when it needs to call another function (instead of recursing via C stack). ============================================================ */ -/* Pending call state — set by cell_rt_signal_call / cell_rt_signal_tail_call, - read by the dispatch loop. */ -static CELL_THREAD_LOCAL JSValue g_pending_callee_frame = 0; /* JSFrameRegister ptr */ -static CELL_THREAD_LOCAL int g_pending_is_tail = 0; - /* Poll pause state on taken backward jumps (AOT backedges). MACH can suspend/resume a register VM frame at pc granularity; native AOT does not currently have an equivalent resume point, so we acknowledge timer @@ -762,34 +753,38 @@ int cell_rt_check_backedge(JSContext *ctx) { } void cell_rt_signal_call(JSContext *ctx, void *fp, int64_t frame_slot) { - (void)ctx; + NativeRTState *st = native_state(ctx); + if (!st) return; JSValue *slots = (JSValue *)fp; - g_pending_callee_frame = slots[frame_slot]; - g_pending_is_tail = 0; + st->pending_callee_frame = slots[frame_slot]; + st->pending_is_tail = 0; } void cell_rt_signal_tail_call(JSContext *ctx, void *fp, int64_t frame_slot) { - (void)ctx; + NativeRTState *st = native_state(ctx); + if (!st) return; JSValue *slots = (JSValue *)fp; - g_pending_callee_frame = slots[frame_slot]; - g_pending_is_tail = 1; + st->pending_callee_frame = slots[frame_slot]; + st->pending_is_tail = 1; } /* Entry point called from JS_CallInternal / JS_Call / MACH_INVOKE for JS_FUNC_KIND_NATIVE functions. */ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; JSFunction *f = JS_VALUE_GET_FUNCTION(func_obj); cell_compiled_fn fn = (cell_compiled_fn)JS_VALUE_GET_CODE(f->u.cell.code)->u.native.fn_ptr; int nr_slots = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots; int arity = f->length; - void *prev_dl_handle = g_current_dl_handle; - g_current_dl_handle = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.dl_handle; + void *prev_dl_handle = st->current_dl_handle; + st->current_dl_handle = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.dl_handle; #define RETURN_DISPATCH(v) \ do { \ atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); \ - g_current_dl_handle = prev_dl_handle; \ + st->current_dl_handle = prev_dl_handle; \ return (v); \ } while (0) @@ -820,11 +815,11 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSFrameRegister *frame = (JSFrameRegister *)((char *)fp - offsetof(JSFrameRegister, slots)); frame->function = func_obj; - int base_depth = g_aot_depth; /* remember entry depth for return detection */ + int base_depth = st->aot_depth; /* remember entry depth for return detection */ for (;;) { - g_pending_callee_frame = 0; - g_pending_is_tail = 0; + st->pending_callee_frame = 0; + st->pending_is_tail = 0; if (atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed) >= 1) atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); @@ -832,26 +827,26 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (JS_IsFunction(frame->function)) { JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); if (cur_fn->kind == JS_FUNC_KIND_NATIVE) - g_current_dl_handle = JS_VALUE_GET_CODE(cur_fn->u.cell.code)->u.native.dl_handle; + st->current_dl_handle = JS_VALUE_GET_CODE(cur_fn->u.cell.code)->u.native.dl_handle; } JSValue result = fn(ctx, fp); /* Re-derive frame after potential GC */ - if (g_aot_depth <= 0) { + if (st->aot_depth <= 0) { fprintf(stderr, "[BUG] native dispatch lost frame depth after fn call\n"); abort(); } - JSValue frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + JSValue frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; - if (g_pending_callee_frame != 0) { + if (st->pending_callee_frame != 0) { /* Function signaled a call — dispatch it */ - JSValue callee_frame_val = g_pending_callee_frame; - g_pending_callee_frame = 0; - int pending_is_tail = g_pending_is_tail; - g_pending_is_tail = 0; + JSValue callee_frame_val = st->pending_callee_frame; + st->pending_callee_frame = 0; + int pending_is_tail = st->pending_is_tail; + st->pending_is_tail = 0; JSGCRef callee_ref; JS_PushGCRef(ctx, &callee_ref); callee_ref.val = callee_frame_val; @@ -897,7 +892,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JS_PopGCRef(ctx, &callee_ref); RETURN_DISPATCH(JS_EXCEPTION); } - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(g_aot_depth - 1)->val); + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(st, st->aot_depth - 1)->val); fp = (JSValue *)frame->slots; fn = callee_ptr; } else { @@ -917,7 +912,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (!cell_rt_push_existing_frame(ctx, callee_ref.val)) { /* Resume caller with exception pending */ - frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); @@ -926,7 +921,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JS_PopGCRef(ctx, &callee_ref); continue; } - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(g_aot_depth - 1)->val); + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(st, st->aot_depth - 1)->val); fp = (JSValue *)frame->slots; fn = callee_ptr; } @@ -942,7 +937,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, callee_argc, &callee_fr->slots[1], 0); /* Re-derive frame after call */ - frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; @@ -970,18 +965,18 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (pending_is_tail) { /* Tail call to non-native: return its result up the chain */ /* Pop current frame and return to caller */ - if (g_aot_depth <= base_depth) { + if (st->aot_depth <= base_depth) { cell_rt_leave_frame(ctx); JS_PopGCRef(ctx, &callee_ref); RETURN_DISPATCH(ret); } /* Pop current frame, return to caller frame */ cell_rt_leave_frame(ctx); - if (g_aot_depth < base_depth) { + if (st->aot_depth < base_depth) { JS_PopGCRef(ctx, &callee_ref); RETURN_DISPATCH(ret); } - frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); @@ -1014,17 +1009,17 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (!JS_HasException(ctx)) JS_Disrupt(ctx); - if (g_aot_depth <= base_depth) { + if (st->aot_depth <= base_depth) { cell_rt_leave_frame(ctx); RETURN_DISPATCH(JS_EXCEPTION); } cell_rt_leave_frame(ctx); - if (g_aot_depth < base_depth) { + if (st->aot_depth < base_depth) { RETURN_DISPATCH(JS_EXCEPTION); } /* Resume caller and tag the return slot with JS_EXCEPTION. */ - frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); @@ -1038,17 +1033,17 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, } /* Normal return — pop frame and store result in caller */ - if (g_aot_depth <= base_depth) { + if (st->aot_depth <= base_depth) { cell_rt_leave_frame(ctx); RETURN_DISPATCH(result); } cell_rt_leave_frame(ctx); - if (g_aot_depth < base_depth) { + if (st->aot_depth < base_depth) { RETURN_DISPATCH(result); } /* Return to caller frame */ - frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); @@ -1064,27 +1059,71 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, #undef RETURN_DISPATCH } +static JSValue aot_get_or_create_native_code(JSContext *ctx, NativeRTState *st, + void *dl_handle, int64_t fn_idx, + int arity, uint16_t nr_slots) { + for (int i = 0; i < st->code_cache_count; i++) { + AOTCodeCacheEntry *e = &st->code_cache[i]; + if (e->dl_handle == dl_handle && e->fn_idx == fn_idx) + return e->code; + } + + char name[64]; + snprintf(name, sizeof(name), "cell_fn_%lld", (long long)fn_idx); + void *fn_ptr = dlsym(dl_handle, name); + if (!fn_ptr) + return JS_RaiseDisrupt(ctx, "native function %s not found in dylib", name); + + JSCode *code = ct_alloc(ctx, sizeof(JSCode), 8); + if (!code) + return JS_EXCEPTION; + memset(code, 0, sizeof(*code)); + code->header = objhdr_make(0, OBJ_CODE, 0, 0, 0, 0); + code->kind = JS_CODE_KIND_NATIVE; + code->arity = (int16_t)arity; + code->u.native.fn_ptr = fn_ptr; + code->u.native.dl_handle = dl_handle; + code->u.native.nr_slots = nr_slots; + JSValue code_obj = JS_MKPTR(code); + + if (st->code_cache_count >= st->code_cache_cap) { + int new_cap = st->code_cache_cap ? (st->code_cache_cap * 2) : 128; + AOTCodeCacheEntry *new_cache = + (AOTCodeCacheEntry *)realloc(st->code_cache, (size_t)new_cap * sizeof(*new_cache)); + if (!new_cache) + return JS_RaiseOOM(ctx); + st->code_cache = new_cache; + st->code_cache_cap = new_cap; + } + + st->code_cache[st->code_cache_count].dl_handle = dl_handle; + st->code_cache[st->code_cache_count].fn_idx = fn_idx; + st->code_cache[st->code_cache_count].code = code_obj; + st->code_cache_count++; + return code_obj; +} + /* Create a native function object from a compiled fn_idx. Called from QBE-generated code during function creation. */ JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp, int64_t nr_args, int64_t nr_slots) { - if (!g_current_dl_handle) + (void)outer_fp; + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + if (!st->current_dl_handle) return JS_RaiseDisrupt(ctx, "no native module loaded"); - /* Resolve fn_ptr via dlsym at creation time — cached in the function object */ - char name[64]; - snprintf(name, sizeof(name), "cell_fn_%lld", (long long)fn_idx); - void *fn_ptr = dlsym(g_current_dl_handle, name); - if (!fn_ptr) - return JS_RaiseDisrupt(ctx, "native function %s not found in dylib", name); + JSValue code_obj = aot_get_or_create_native_code( + ctx, st, st->current_dl_handle, fn_idx, (int)nr_args, (uint16_t)nr_slots); + if (JS_IsException(code_obj)) + return JS_EXCEPTION; /* Get the current frame as outer_frame for closures */ JSValue outer_frame = JS_NULL; - if (g_aot_depth > 0) - outer_frame = aot_gc_ref_at(g_aot_depth - 1)->val; + if (st->aot_depth > 0) + outer_frame = aot_gc_ref_at(st, st->aot_depth - 1)->val; - return js_new_native_function(ctx, fn_ptr, g_current_dl_handle, - (uint16_t)nr_slots, (int)nr_args, outer_frame); + return js_new_native_function_with_code(ctx, code_obj, (int)nr_args, outer_frame); } /* --- Frame-based function calling --- @@ -1338,22 +1377,24 @@ JSValue cell_rt_regexp(JSContext *ctx, const char *pattern, const char *flags) { /* --- Module entry point --- Loads a native .cm module from a dylib handle. - Looks up cell_main, builds a heap-allocated frame, sets - g_current_dl_handle so closures register in the right module. */ + Looks up cell_main, builds a heap-allocated frame, and + records active module handle in per-actor native state. */ /* Helper: run a native module's entry point through the dispatch loop. Creates a temporary JS_FUNC_KIND_NATIVE function so that the full dispatch loop (tail calls, closures, etc.) works for module-level code. */ static JSValue native_module_run(JSContext *ctx, void *dl_handle, cell_compiled_fn entry, int nr_slots) { - void *prev_handle = g_current_dl_handle; - g_current_dl_handle = dl_handle; + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + void *prev_handle = st->current_dl_handle; + st->current_dl_handle = dl_handle; /* Create a native function object for the entry point */ JSValue func_obj = js_new_native_function(ctx, (void *)entry, dl_handle, (uint16_t)nr_slots, 0, JS_NULL); if (JS_IsException(func_obj)) { - g_current_dl_handle = prev_handle; + st->current_dl_handle = prev_handle; return JS_EXCEPTION; } @@ -1362,7 +1403,7 @@ static JSValue native_module_run(JSContext *ctx, void *dl_handle, JS_GetException(ctx); JSValue result = cell_native_dispatch(ctx, func_obj, JS_NULL, 0, NULL); - g_current_dl_handle = prev_handle; + st->current_dl_handle = prev_handle; return result; } @@ -1409,6 +1450,38 @@ JSValue cell_rt_native_module_load_named(JSContext *ctx, void *dl_handle, const return native_module_run(ctx, dl_handle, fn, nr_slots); } +void cell_rt_free_native_state(JSContext *ctx) { + NativeRTState *st = (NativeRTState *)ctx->native_state; + if (!st) return; + + aot_clear_lit_pool(ctx, st); + + if (st->has_native_env && st->native_env_ref_inited) { + JS_DeleteGCRef(ctx, &st->native_env_ref); + st->native_env_ref_inited = 0; + st->native_env_ref.val = JS_NULL; + } + + for (int ci = 0; ci < st->gc_ref_chunk_count; ci++) { + AOTGCRefChunk *chunk = st->gc_ref_chunks[ci]; + if (!chunk) continue; + for (int si = 0; si < AOT_GC_REF_CHUNK_SIZE; si++) { + if (chunk->inited[si]) { + JS_DeleteGCRef(ctx, &chunk->refs[si]); + chunk->inited[si] = 0; + chunk->refs[si].val = JS_NULL; + } + } + free(chunk); + } + + free(st->gc_ref_chunks); + free(st->key_cache); + free(st->code_cache); + js_free_rt(st); + ctx->native_state = NULL; +} + /* Backward-compat: uses RTLD_DEFAULT (works when dylib opened with RTLD_GLOBAL) */ JSValue cell_rt_module_entry(JSContext *ctx) { void *handle = dlopen(NULL, RTLD_LAZY); diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index 73dcdad7..db5546d6 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -1121,6 +1121,7 @@ struct JSContext { JSGCRef *last_gc_ref; /* used to reference temporary GC roots (list) */ JSLocalRef *top_local_ref; /* for JS_LOCAL macro - GC updates C locals through pointers */ CCallRoot *c_call_root; /* stack of auto-rooted C call argv arrays */ + void *native_state; /* qbe_helpers.c per-actor native runtime state */ int class_count; /* size of class_array and class_proto */ JSClass *class_array; @@ -1562,7 +1563,7 @@ static inline void set_value (JSContext *ctx, JSValue *pval, JSValue new_val) { *pval = new_val; } -int cell_rt_native_active(void); +int cell_rt_native_active(JSContext *ctx); static inline __exception int js_poll_interrupts (JSContext *ctx) { if (unlikely (atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2)) { @@ -1659,7 +1660,9 @@ JSValue js_key_from_string (JSContext *ctx, JSValue val); /* mach.c exports */ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame); JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, uint16_t nr_slots, int arity, JSValue outer_frame); +JSValue js_new_native_function_with_code(JSContext *ctx, JSValue code_obj, int arity, JSValue outer_frame); JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count); +void cell_rt_free_native_state(JSContext *ctx); #endif /* QUICKJS_INTERNAL_H */ diff --git a/source/runtime.c b/source/runtime.c index 8bc865a4..9bc46d5d 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -2102,6 +2102,7 @@ void JS_FreeContext (JSContext *ctx) { JSRuntime *rt = ctx->rt; int i; + cell_rt_free_native_state(ctx); JS_DeleteGCRef(ctx, &ctx->suspended_frame_ref); for (i = 0; i < ctx->class_count; i++) { @@ -5332,7 +5333,7 @@ JSValue js_regexp_toString (JSContext *ctx, JSValue this_val, int argc, JSValue int lre_check_timeout (void *opaque) { JSContext *ctx = opaque; - if (cell_rt_native_active ()) { + if (cell_rt_native_active (ctx)) { atomic_store_explicit (&ctx->pause_flag, 0, memory_order_relaxed); return 0; }