diff --git a/build.cm b/build.cm index 5d2a40fb..5d14d501 100644 --- a/build.cm +++ b/build.cm @@ -80,6 +80,17 @@ function content_hash(str) { return text(crypto.blake2(bb, 32), 'h') } +// Bump when native codegen/runtime ABI changes so stale dylibs are not reused. +def NATIVE_CACHE_VERSION = "native-v8" + +// Enable AOT ASan by creating .cell/asan_aot in the package root. +function native_sanitize_flags() { + if (fd.is_file('.cell/asan_aot')) { + return ' -fsanitize=address -fno-omit-frame-pointer' + } + return '' +} + function get_build_dir() { return shop.get_build_dir() } @@ -509,7 +520,8 @@ Build.build_static = function(packages, target, output, buildtype) { // il_parts: {data: text, functions: [text, ...]} // cc: C compiler path // tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_) -function compile_native_single(il_parts, cc, tmp_prefix) { +function compile_native_single(il_parts, cc, tmp_prefix, extra_flags) { + var _extra = extra_flags || '' var helpers_il = (il_parts.helpers && length(il_parts.helpers) > 0) ? text(il_parts.helpers, "\n") : "" var all_fns = text(il_parts.functions, "\n") @@ -519,7 +531,7 @@ function compile_native_single(il_parts, cc, tmp_prefix) { var o_path = tmp_prefix + '.o' var rc = null fd.slurpwrite(s_path, stone(blob(asm_text))) - rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path) + rc = os.system(cc + _extra + ' -c ' + s_path + ' -o ' + o_path) if (rc != 0) { print('Assembly failed'); disrupt } @@ -572,6 +584,8 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var tc = toolchains[_target] var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so') var cc = tc.c + var san_flags = native_sanitize_flags() + var san_suffix = length(san_flags) > 0 ? '_asan' : '' // Step 1: Compile through pipeline var optimized = shop.compile_file(src_path) @@ -586,7 +600,7 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) // Content hash for cache key - var hash = content_hash(text(fd.slurp(src_path)) + '\n' + _target + '\nnative') + var hash = content_hash(text(fd.slurp(src_path)) + '\n' + _target + '\nnative\n' + NATIVE_CACHE_VERSION + '\n' + san_flags) var build_dir = get_build_dir() ensure_dir(build_dir) @@ -596,22 +610,22 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { // Compile and assemble via batched parallel pipeline var tmp = '/tmp/cell_native_' + hash - var rt_o_path = '/tmp/cell_qbe_rt.o' + var rt_o_path = '/tmp/cell_qbe_rt' + san_suffix + '.o' - var o_paths = compile_native_single(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp, san_flags) // Compile QBE runtime stubs if needed var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' - rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') + rc = os.system(cc + san_flags + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') if (rc != 0) { print('QBE runtime stubs compilation failed'); disrupt } } // Link dylib - var link_cmd = cc + ' -shared -fPIC' + var link_cmd = cc + san_flags + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { @@ -658,6 +672,8 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var tc = toolchains[_target] var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so') var cc = tc.c + var san_flags = native_sanitize_flags() + var san_suffix = length(san_flags) > 0 ? '_asan' : '' var qbe_macros = use('qbe') var qbe_emit = use('qbe_emit') @@ -669,7 +685,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) var src = text(fd.slurp(src_path)) - var hash = content_hash(src + '\n' + _target + '\nnative') + var hash = content_hash(src + '\n' + _target + '\nnative\n' + NATIVE_CACHE_VERSION + '\n' + san_flags) var build_dir = get_build_dir() ensure_dir(build_dir) @@ -679,22 +695,22 @@ Build.compile_native_ir = function(optimized, src_path, opts) { // Compile and assemble via batched parallel pipeline var tmp = '/tmp/cell_native_' + hash - var rt_o_path = '/tmp/cell_qbe_rt.o' + var rt_o_path = '/tmp/cell_qbe_rt' + san_suffix + '.o' - var o_paths = compile_native_single(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp, san_flags) // Compile QBE runtime stubs if needed var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' - rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') + rc = os.system(cc + san_flags + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') if (rc != 0) { print('QBE runtime stubs compilation failed'); disrupt } } // Link dylib - var link_cmd = cc + ' -shared -fPIC' + var link_cmd = cc + san_flags + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { diff --git a/internal/os.c b/internal/os.c index 4fa6d549..22b9acc6 100644 --- a/internal/os.c +++ b/internal/os.c @@ -306,6 +306,7 @@ static JSValue js_os_rusage(JSContext *js, JSValue self, int argc, JSValue *argv JSC_SCALL(os_system, int err = system(str); + JS_SetPauseFlag(js, 0); ret = number2js(js,err); ) diff --git a/qbe_emit.cm b/qbe_emit.cm index 23924b2c..e62b419f 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -360,10 +360,93 @@ ${sw("w", "%fp", "%dest", "%r")} // Category C: Allocating helpers (return fp or 0) // ============================================================ - // Allocating binary ops: read 2 slots, call C, refresh, write dest + // add: int fast path in-helper, slow path calls runtime + h[] = `export function l $__add_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_is_int =w ceql %a_tag, 0 + %b_is_int =w ceql %b_tag, 0 + %both_int =w and %a_is_int, %b_is_int + jnz %both_int, @int_fast, @slow +@int_fast + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %sum =l add %ai, %bi + %sumw =w copy %sum + %sumext =l extsw %sumw + %sum_ok =w ceql %sumext, %sum + jnz %sum_ok, @int_store, @slow +@int_store + %rtag =l shl %sum, 1 +${sw("w", "%fp", "%dest", "%rtag")} + ret %fp +@slow + %r =l call $cell_rt_add(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // sub: int fast path in-helper, slow path calls float helper + h[] = `export function l $__sub_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_is_int =w ceql %a_tag, 0 + %b_is_int =w ceql %b_tag, 0 + %both_int =w and %a_is_int, %b_is_int + jnz %both_int, @int_fast, @slow +@int_fast + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %diff =l sub %ai, %bi + %diffw =w copy %diff + %diffext =l extsw %diffw + %diff_ok =w ceql %diffext, %diff + jnz %diff_ok, @int_store, @slow +@int_store + %rtag =l shl %diff, 1 +${sw("w", "%fp", "%dest", "%rtag")} + ret %fp +@slow + %r =l call $qbe_float_sub(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // mul: int fast path in-helper, slow path calls float helper + h[] = `export function l $__mul_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_is_int =w ceql %a_tag, 0 + %b_is_int =w ceql %b_tag, 0 + %both_int =w and %a_is_int, %b_is_int + jnz %both_int, @int_fast, @slow +@int_fast + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %prod =l mul %ai, %bi + %prodw =w copy %prod + %prodext =l extsw %prodw + %prod_ok =w ceql %prodext, %prod + jnz %prod_ok, @int_store, @slow +@int_store + %rtag =l shl %prod, 1 +${sw("w", "%fp", "%dest", "%rtag")} + ret %fp +@slow + %r =l call $qbe_float_mul(l %ctx, l %a, l %b) +${alloc_tail("%r")} +}` + + // Remaining allocating binary ops: call C, refresh, write dest var ab_ops = [ - ["add", "cell_rt_add"], ["sub", "qbe_float_sub"], - ["mul", "qbe_float_mul"], ["div", "qbe_float_div"], + ["div", "qbe_float_div"], ["mod", "qbe_float_mod"], ["pow", "qbe_float_pow"], ["concat", "JS_ConcatString"] ] @@ -685,11 +768,27 @@ var qbe_emit = function(ir, qbe, export_name) { var si = 0 var scan = null var scan_op = null + var label_pos = {} + var instr_idx = 0 var has_invokes = false var seg_counter = 0 var ri = 0 var seg_num = 0 var resume_val = 0 + var j_lbl = null + var j_idx = null + var jt_lbl = null + var jt_idx = null + var jt_backedge = false + var jf_lbl = null + var jf_idx = null + var jf_backedge = false + var jn_lbl = null + var jn_idx = null + var jn_backedge = false + var jnn_lbl = null + var jnn_idx = null + var jnn_backedge = false // Pre-scan: count invoke/tail_invoke points to assign segment numbers. // Must skip dead code (instructions after terminators) the same way @@ -701,6 +800,7 @@ var qbe_emit = function(ir, qbe, export_name) { scan = instrs[si] si = si + 1 if (is_text(scan)) { + label_pos[sanitize(scan)] = si - 1 // Labels reset dead code state (unless they're nop pseudo-labels) if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_")) scan_dead = false @@ -709,11 +809,11 @@ var qbe_emit = function(ir, qbe, export_name) { if (scan_dead) continue if (!is_array(scan)) continue scan_op = scan[0] - if (scan_op == "invoke" || scan_op == "tail_invoke") { + if (scan_op == "invoke") { invoke_count = invoke_count + 1 } // Track terminators — same set as in the main loop - if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "disrupt") { + if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "tail_invoke" || scan_op == "disrupt") { scan_dead = true } } @@ -795,11 +895,24 @@ var qbe_emit = function(ir, qbe, export_name) { emit(`@${lbl}_ok`) } + // Poll pause/interrupt state on taken backward jumps. + var emit_backedge_branch = function(target_label) { + var chk_lbl = fresh() + emit(` %${chk_lbl} =w call $cell_rt_check_backedge(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jnz %${chk_lbl}, @disruption_handler, @${target_label}`) + } else { + needs_exc_ret = true + emit(` jnz %${chk_lbl}, @_exc_ret, @${target_label}`) + } + } + // Walk instructions var last_was_term = false i = 0 while (i < length(instrs)) { instr = instrs[i] + instr_idx = i // Emit @disruption_handler at the right flat index // disruption_pc counts all entries (labels + instructions) @@ -909,18 +1022,117 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Generic arithmetic (VM dispatches int/float) --- if (op == "add") { - emit(` %fp =l call $__add_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_sum =l add %${p}_ai, %${p}_bi`) + emit(` %${p}_sumw =w copy %${p}_sum`) + emit(` %${p}_sumext =l extsw %${p}_sumw`) + emit(` %${p}_sum_ok =w ceql %${p}_sumext, %${p}_sum`) + emit(` jnz %${p}_sum_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_sum, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` %${p}_r =l call $cell_rt_add(l %ctx, l ${lhs}, l ${rhs})`) + emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) + chk = fresh() + emit(` %${chk} =w ceql %fp, 0`) + if (has_handler && !in_handler) { + emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) + } + emit(`@${chk}_ok`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "subtract") { - emit(` %fp =l call $__sub_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_diff =l sub %${p}_ai, %${p}_bi`) + emit(` %${p}_diffw =w copy %${p}_diff`) + emit(` %${p}_diffext =l extsw %${p}_diffw`) + emit(` %${p}_diff_ok =w ceql %${p}_diffext, %${p}_diff`) + emit(` jnz %${p}_diff_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_diff, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` %${p}_r =l call $qbe_float_sub(l %ctx, l ${lhs}, l ${rhs})`) + emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) + chk = fresh() + emit(` %${chk} =w ceql %fp, 0`) + if (has_handler && !in_handler) { + emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) + } + emit(`@${chk}_ok`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "multiply") { - emit(` %fp =l call $__mul_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_prod =l mul %${p}_ai, %${p}_bi`) + emit(` %${p}_prodw =w copy %${p}_prod`) + emit(` %${p}_prodext =l extsw %${p}_prodw`) + emit(` %${p}_prod_ok =w ceql %${p}_prodext, %${p}_prod`) + emit(` jnz %${p}_prod_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_prod, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` %${p}_r =l call $qbe_float_mul(l %ctx, l ${lhs}, l ${rhs})`) + emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) + chk = fresh() + emit(` %${chk} =w ceql %fp, 0`) + if (has_handler && !in_handler) { + emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) + } + emit(`@${chk}_ok`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "divide") { @@ -1003,27 +1215,93 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Comparisons (int path, no GC) --- if (op == "eq_int") { - emit(` call $__eq_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w ceqw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "ne_int") { - emit(` call $__ne_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w cnew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "lt_int") { - emit(` call $__lt_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csltw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "gt_int") { - emit(` call $__gt_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csgtw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "le_int") { - emit(` call $__le_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w cslew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "ge_int") { - emit(` call $__ge_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csgew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } @@ -1240,39 +1518,99 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Control flow --- if (op == "jump") { - emit(` jmp @${sanitize(a1)}`) + j_lbl = sanitize(a1) + j_idx = label_pos[j_lbl] + if (j_idx != null && j_idx < instr_idx) { + emit_backedge_branch(j_lbl) + } else { + emit(` jmp @${j_lbl}`) + } last_was_term = true continue } if (op == "jump_true") { v = s_read(a1) p = fresh() - emit(` %${p} =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`) + jt_lbl = sanitize(a2) + jt_idx = label_pos[jt_lbl] + jt_backedge = jt_idx != null && jt_idx < instr_idx + emit(` %${p}_is_true =w ceql ${v}, ${text(qbe.js_true)}`) + emit(` jnz %${p}_is_true, @${p}_take, @${p}_chk_fast`) + emit(`@${p}_chk_fast`) + emit(` %${p}_tag =l and ${v}, 31`) + emit(` %${p}_is_bool =w ceql %${p}_tag, 3`) + emit(` %${p}_is_null =w ceql %${p}_tag, 7`) + emit(` %${p}_is_falsey =w or %${p}_is_bool, %${p}_is_null`) + emit(` jnz %${p}_is_falsey, @${p}_f, @${p}_tb`) + emit(`@${p}_tb`) + emit(` %${p}_tbv =w call $JS_ToBool(l %ctx, l ${v})`) + emit(` jnz %${p}_tbv, @${p}_take, @${p}_f`) + emit(`@${p}_take`) + if (jt_backedge) { + emit_backedge_branch(jt_lbl) + } else { + emit(` jmp @${jt_lbl}`) + } emit(`@${p}_f`) continue } if (op == "jump_false") { v = s_read(a1) p = fresh() - emit(` %${p} =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`) + jf_lbl = sanitize(a2) + jf_idx = label_pos[jf_lbl] + jf_backedge = jf_idx != null && jf_idx < instr_idx + emit(` %${p}_is_true =w ceql ${v}, ${text(qbe.js_true)}`) + emit(` jnz %${p}_is_true, @${p}_t, @${p}_chk_fast`) + emit(`@${p}_chk_fast`) + emit(` %${p}_tag =l and ${v}, 31`) + emit(` %${p}_is_bool =w ceql %${p}_tag, 3`) + emit(` %${p}_is_null =w ceql %${p}_tag, 7`) + emit(` %${p}_is_fast_false =w or %${p}_is_bool, %${p}_is_null`) + emit(` jnz %${p}_is_fast_false, @${p}_take, @${p}_tb`) + emit(`@${p}_tb`) + emit(` %${p}_tbv =w call $JS_ToBool(l %ctx, l ${v})`) + emit(` jnz %${p}_tbv, @${p}_t, @${p}_take`) + emit(`@${p}_take`) + if (jf_backedge) { + emit_backedge_branch(jf_lbl) + } else { + emit(` jmp @${jf_lbl}`) + } emit(`@${p}_t`) continue } if (op == "jump_null") { v = s_read(a1) p = fresh() + jn_lbl = sanitize(a2) + jn_idx = label_pos[jn_lbl] + jn_backedge = jn_idx != null && jn_idx < instr_idx emit(` %${p} =w ceql ${v}, ${text(qbe.js_null)}`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_nn`) + if (jn_backedge) { + emit(` jnz %${p}, @${p}_bn, @${p}_nn`) + emit(`@${p}_bn`) + emit_backedge_branch(jn_lbl) + } else { + emit(` jnz %${p}, @${jn_lbl}, @${p}_nn`) + } emit(`@${p}_nn`) continue } if (op == "jump_not_null") { v = s_read(a1) p = fresh() + jnn_lbl = sanitize(a2) + jnn_idx = label_pos[jnn_lbl] + jnn_backedge = jnn_idx != null && jnn_idx < instr_idx emit(` %${p} =w cnel ${v}, ${text(qbe.js_null)}`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_n`) + if (jnn_backedge) { + emit(` jnz %${p}, @${p}_bn, @${p}_n`) + emit(`@${p}_bn`) + emit_backedge_branch(jnn_lbl) + } else { + emit(` jnz %${p}, @${jnn_lbl}, @${p}_n`) + } emit(`@${p}_n`) continue } @@ -1316,26 +1654,14 @@ var qbe_emit = function(ir, qbe, export_name) { continue } if (op == "tail_invoke") { - // Same as invoke — dispatch loop regular call with resume - seg_counter = seg_counter + 1 - seg_num = seg_counter - resume_val = seg_num * 65536 + a2 - emit(` %_tinv_addr${text(seg_num)} =l sub %fp, 8`) - emit(` storel ${text(resume_val * 2)}, %_tinv_addr${text(seg_num)}`) - emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) - emit(" ret 0") - emit(`@_seg${text(seg_num)}`) - // Check for exception after dispatch loop resumes us + // Tail call: hand control to dispatch loop and do not resume this segment. + // Use 0xFFFF as ret_slot (no result writeback into current frame). p = fresh() - emit(` %${p} =w call $JS_HasException(l %ctx)`) - if (has_handler && !in_handler) { - emit(` jnz %${p}, @disruption_handler, @${p}_ok`) - } else { - needs_exc_ret = true - emit(` jnz %${p}, @_exc_ret, @${p}_ok`) - } - emit(`@${p}_ok`) - last_was_term = false + emit(` %${p}_addr =l sub %fp, 8`) + emit(` storel ${text(65535 * 2)}, %${p}_addr`) + emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) + emit(" ret 0") + last_was_term = true continue } if (op == "goframe") { diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 547f3d2f..884f4cc6 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -9,6 +9,14 @@ #include "quickjs-internal.h" #include #include +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#define CELL_THREAD_LOCAL __thread +#else +#define CELL_THREAD_LOCAL _Thread_local +#endif /* Non-inline wrappers for static inline functions in quickjs.h */ JSValue qbe_new_float64(JSContext *ctx, double d) { @@ -278,8 +286,8 @@ void cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, /* Native module environment — set before executing a native module's cell_main. Contains runtime functions (starts_with, ends_with, etc.) and use(). */ -static JSGCRef g_native_env_ref; -static int g_has_native_env = 0; +static CELL_THREAD_LOCAL JSGCRef g_native_env_ref; +static CELL_THREAD_LOCAL int g_has_native_env = 0; void cell_rt_set_native_env(JSContext *ctx, JSValue env) { if (!JS_IsNull(env) && !JS_IsStone(env)) { @@ -370,18 +378,58 @@ void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth, update the current frame pointer when it moves objects. cell_rt_refresh_fp re-derives the slot pointer after any GC call. */ -#define MAX_AOT_DEPTH 8192 -static JSGCRef g_aot_gc_refs[MAX_AOT_DEPTH]; -static int g_aot_depth = 0; +// Keep GC roots for native frames in stable heap chunks (no fixed depth cap). +#define AOT_GC_REF_CHUNK_SIZE 1024 +typedef struct AOTGCRefChunk { + JSGCRef refs[AOT_GC_REF_CHUNK_SIZE]; +} AOTGCRefChunk; + +static CELL_THREAD_LOCAL AOTGCRefChunk **g_aot_gc_ref_chunks = NULL; +static CELL_THREAD_LOCAL int g_aot_gc_ref_chunk_count = 0; +static CELL_THREAD_LOCAL int g_aot_depth = 0; + +int cell_rt_native_active(void) { + return g_aot_depth > 0; +} + +static int ensure_aot_gc_ref_slot(JSContext *ctx, int depth_index) { + if (depth_index < 0) + return 0; + int needed_chunks = (depth_index / AOT_GC_REF_CHUNK_SIZE) + 1; + if (needed_chunks <= g_aot_gc_ref_chunk_count) + return 1; + AOTGCRefChunk **new_chunks = + (AOTGCRefChunk **)realloc(g_aot_gc_ref_chunks, + (size_t)needed_chunks * sizeof(*new_chunks)); + if (!new_chunks) { + JS_ThrowOutOfMemory(ctx); + return 0; + } + g_aot_gc_ref_chunks = new_chunks; + for (int i = g_aot_gc_ref_chunk_count; i < needed_chunks; i++) { + g_aot_gc_ref_chunks[i] = (AOTGCRefChunk *)calloc(1, sizeof(AOTGCRefChunk)); + if (!g_aot_gc_ref_chunks[i]) { + JS_ThrowOutOfMemory(ctx); + return 0; + } + } + g_aot_gc_ref_chunk_count = needed_chunks; + return 1; +} + +static inline JSGCRef *aot_gc_ref_at(int depth_index) { + int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; + int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; + return &g_aot_gc_ref_chunks[chunk_index]->refs[slot_index]; +} JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { - if (g_aot_depth >= MAX_AOT_DEPTH) { - JS_ThrowTypeError(ctx, "native call stack overflow (depth %d)", g_aot_depth); + if (!ensure_aot_gc_ref_slot(ctx, g_aot_depth)) { return NULL; } JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots); if (!frame) return NULL; - JSGCRef *ref = &g_aot_gc_refs[g_aot_depth]; + JSGCRef *ref = aot_gc_ref_at(g_aot_depth); JS_AddGCRef(ctx, ref); ref->val = JS_MKPTR(frame); g_aot_depth++; @@ -394,7 +442,7 @@ JSValue *cell_rt_refresh_fp(JSContext *ctx) { fprintf(stderr, "[BUG] cell_rt_refresh_fp: g_aot_depth=%d\n", g_aot_depth); abort(); } - JSValue val = g_aot_gc_refs[g_aot_depth - 1].val; + JSValue val = aot_gc_ref_at(g_aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp: frame is NULL at depth=%d val=%lld\n", @@ -412,7 +460,7 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: g_aot_depth=%d\n", g_aot_depth); abort(); } - JSValue val = g_aot_gc_refs[g_aot_depth - 1].val; + JSValue val = aot_gc_ref_at(g_aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: frame is NULL\n"); @@ -422,8 +470,12 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { } void cell_rt_leave_frame(JSContext *ctx) { + if (g_aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_leave_frame underflow\n"); + abort(); + } g_aot_depth--; - JS_DeleteGCRef(ctx, &g_aot_gc_refs[g_aot_depth]); + JS_DeleteGCRef(ctx, aot_gc_ref_at(g_aot_depth)); } /* --- Function creation and calling --- */ @@ -432,7 +484,7 @@ typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); /* Set before executing a native module's cell_main — used by cell_rt_make_function to resolve fn_ptr via dlsym */ -static void *g_current_dl_handle = NULL; +static CELL_THREAD_LOCAL void *g_current_dl_handle = NULL; /* ============================================================ Dispatch loop — the core of native function execution. @@ -442,8 +494,20 @@ static void *g_current_dl_handle = NULL; /* Pending call state — set by cell_rt_signal_call / cell_rt_signal_tail_call, read by the dispatch loop. */ -static JSValue g_pending_callee_frame = 0; /* JSFrameRegister ptr */ -static int g_pending_is_tail = 0; +static CELL_THREAD_LOCAL JSValue g_pending_callee_frame = 0; /* JSFrameRegister ptr */ +static CELL_THREAD_LOCAL int g_pending_is_tail = 0; + +/* Poll pause state on taken backward jumps (AOT backedges). + MACH can suspend/resume a register VM frame at pc granularity; native AOT + does not currently have an equivalent resume point, so we acknowledge timer + pauses by clearing pause_flag and continuing the current turn. */ +int cell_rt_check_backedge(JSContext *ctx) { + int pf = atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed); + if (pf >= 1) { + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); + } + return 0; +} void cell_rt_signal_call(JSContext *ctx, void *fp, int64_t frame_slot) { (void)ctx; @@ -467,6 +531,15 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, cell_compiled_fn fn = (cell_compiled_fn)f->u.native.fn_ptr; int nr_slots = f->u.native.nr_slots; int arity = f->length; + void *prev_dl_handle = g_current_dl_handle; + g_current_dl_handle = f->u.native.dl_handle; + +#define RETURN_DISPATCH(v) \ + do { \ + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); \ + g_current_dl_handle = prev_dl_handle; \ + return (v); \ + } while (0) /* Root func_obj across allocation — GC can move it */ JSGCRef func_ref; @@ -477,7 +550,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSValue *fp = cell_rt_enter_frame(ctx, nr_slots); if (!fp) { JS_PopGCRef(ctx, &func_ref); - return JS_EXCEPTION; + RETURN_DISPATCH(JS_EXCEPTION); } /* Re-derive func_obj after potential GC */ @@ -499,11 +572,25 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, for (;;) { g_pending_callee_frame = 0; + g_pending_is_tail = 0; + if (atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed) >= 1) + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); + + /* Keep closure creation bound to the currently executing native module. */ + if (JS_IsFunction(frame->function)) { + JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); + if (cur_fn->kind == JS_FUNC_KIND_NATIVE) + g_current_dl_handle = cur_fn->u.native.dl_handle; + } JSValue result = fn(ctx, fp); /* Re-derive frame after potential GC */ - JSValue frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + if (g_aot_depth <= 0) { + fprintf(stderr, "[BUG] native dispatch lost frame depth after fn call\n"); + abort(); + } + JSValue frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; @@ -511,7 +598,12 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* Function signaled a call — dispatch it */ JSValue callee_frame_val = g_pending_callee_frame; g_pending_callee_frame = 0; - JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_frame_val); + int pending_is_tail = g_pending_is_tail; + g_pending_is_tail = 0; + JSGCRef callee_ref; + JS_PushGCRef(ctx, &callee_ref); + callee_ref.val = callee_frame_val; + JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); int callee_argc = (int)objhdr_cap56(callee_fr->header); callee_argc = (callee_argc >= 2) ? callee_argc - 2 : 0; JSValue callee_fn_val = callee_fr->function; @@ -521,95 +613,86 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* Resume caller with exception pending */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_ref); continue; } - JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(callee_fn_val); + JSGCRef callee_fn_ref; + JS_PushGCRef(ctx, &callee_fn_ref); + callee_fn_ref.val = callee_fn_val; + JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(callee_fn_ref.val); if (callee_fn->kind == JS_FUNC_KIND_NATIVE) { /* Native-to-native call — no C stack growth */ cell_compiled_fn callee_ptr = (cell_compiled_fn)callee_fn->u.native.fn_ptr; int callee_slots = callee_fn->u.native.nr_slots; - if (g_pending_is_tail) { - /* Tail call: reuse or replace current frame */ - if (callee_slots <= (int)objhdr_cap56(frame->header)) { - /* Reuse current frame */ - int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; - if (cc < 0) cc = callee_argc; - frame->slots[0] = callee_fr->slots[0]; /* this */ - for (int i = 0; i < cc && i < callee_slots - 1; i++) - frame->slots[1 + i] = callee_fr->slots[1 + i]; - /* Null out remaining slots */ - int cur_slots = (int)objhdr_cap56(frame->header); - for (int i = 1 + cc; i < cur_slots; i++) - frame->slots[i] = JS_NULL; - frame->function = callee_fn_val; - frame->address = JS_NewInt32(ctx, 0); - fn = callee_ptr; - /* fp stays the same (same frame) */ - } else { - /* Need bigger frame — save callee info, pop+push */ - JSValue saved_caller = frame->caller; - JSValue callee_this = callee_fr->slots[0]; - int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; - if (cc < 0) cc = callee_argc; - JSValue callee_args[cc > 0 ? cc : 1]; - for (int i = 0; i < cc; i++) - callee_args[i] = callee_fr->slots[1 + i]; + if (pending_is_tail) { + /* Tail call: replace frame instead of mutating in place. + In-place reuse breaks closures that captured the caller frame. */ + JSValue saved_caller = frame->caller; + int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; + if (cc < 0) cc = callee_argc; - /* Pop old frame */ - cell_rt_leave_frame(ctx); + /* Pop old frame */ + cell_rt_leave_frame(ctx); - /* Push new right-sized frame */ - JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); - if (!new_fp) - return JS_EXCEPTION; - JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); - new_frame->function = callee_fn_val; - new_frame->caller = saved_caller; - new_frame->slots[0] = callee_this; - for (int i = 0; i < cc && i < callee_slots - 1; i++) - new_frame->slots[1 + i] = callee_args[i]; - frame = new_frame; - fp = new_fp; - fn = callee_ptr; + /* Push new right-sized frame */ + JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); + if (!new_fp) { + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(JS_EXCEPTION); } + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); + callee_fn_val = callee_fn_ref.val; + new_frame->function = callee_fn_val; + new_frame->caller = saved_caller; + new_frame->slots[0] = callee_fr->slots[0]; + for (int i = 0; i < cc && i < callee_slots - 1; i++) + new_frame->slots[1 + i] = callee_fr->slots[1 + i]; + frame = new_frame; + fp = new_fp; + fn = callee_ptr; } else { /* Regular call: push new frame, link caller */ int ret_info = JS_VALUE_GET_INT(frame->address); int resume_seg = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; - /* Save callee info before allocation */ - JSValue callee_this = callee_fr->slots[0]; int cc = (callee_argc < callee_fn->length) ? callee_argc : callee_fn->length; if (cc < 0) cc = callee_argc; - JSValue callee_args[cc > 0 ? cc : 1]; - for (int i = 0; i < cc; i++) - callee_args[i] = callee_fr->slots[1 + i]; JSValue *new_fp = cell_rt_enter_frame(ctx, callee_slots); if (!new_fp) { /* Resume caller with exception pending */ - frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); continue; } + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); /* Re-derive caller frame after alloc */ - frame_val = g_aot_gc_refs[g_aot_depth - 2].val; + if (g_aot_depth <= 1) { + fprintf(stderr, "[BUG] native dispatch bad depth while linking caller: %d\n", g_aot_depth); + abort(); + } + frame_val = aot_gc_ref_at(g_aot_depth - 2)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); JSFrameRegister *new_frame = (JSFrameRegister *)((char *)new_fp - offsetof(JSFrameRegister, slots)); + callee_fn_val = callee_fn_ref.val; new_frame->function = callee_fn_val; new_frame->caller = JS_MKPTR(frame); - new_frame->slots[0] = callee_this; + new_frame->slots[0] = callee_fr->slots[0]; for (int i = 0; i < cc && i < callee_slots - 1; i++) - new_frame->slots[1 + i] = callee_args[i]; + new_frame->slots[1 + i] = callee_fr->slots[1 + i]; /* Save return address in caller */ frame->address = JS_NewInt32(ctx, (resume_seg << 16) | ret_slot); @@ -630,7 +713,7 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, callee_argc, &callee_fr->slots[1], 0); /* Re-derive frame after call */ - frame_val = g_aot_gc_refs[g_aot_depth - 1].val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; @@ -643,28 +726,29 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, Just resume it — it will detect the exception. */ JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); fn = (cell_compiled_fn)exc_fn->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_ref); continue; } /* Clear stale exception */ if (JS_HasException(ctx)) JS_GetException(ctx); - if (g_pending_is_tail) { + if (pending_is_tail) { /* Tail call to non-native: return its result up the chain */ /* Pop current frame and return to caller */ if (g_aot_depth <= base_depth) { cell_rt_leave_frame(ctx); - return ret; + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(ret); } /* Pop current frame, return to caller frame */ - JSValue caller_val = frame->caller; cell_rt_leave_frame(ctx); - if (JS_IsNull(caller_val) || g_aot_depth < base_depth) { - return ret; + if (g_aot_depth < base_depth) { + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(ret); } - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(caller_val); - /* Update GC ref to point to caller */ - g_aot_gc_refs[g_aot_depth - 1].val = caller_val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); int ret_slot = ret_info & 0xFFFF; @@ -684,6 +768,8 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fn = (cell_compiled_fn)cur_fn->u.native.fn_ptr; } } + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); continue; } @@ -696,19 +782,16 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, if (g_aot_depth <= base_depth) { cell_rt_leave_frame(ctx); - return JS_EXCEPTION; + RETURN_DISPATCH(JS_EXCEPTION); } - - JSValue exc_caller_val = frame->caller; cell_rt_leave_frame(ctx); - - if (JS_IsNull(exc_caller_val) || g_aot_depth < base_depth) { - return JS_EXCEPTION; + if (g_aot_depth < base_depth) { + RETURN_DISPATCH(JS_EXCEPTION); } /* Resume caller — it will check JS_HasException and branch to handler */ - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(exc_caller_val); - g_aot_gc_refs[g_aot_depth - 1].val = exc_caller_val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; JSFunction *exc_caller_fn = JS_VALUE_GET_FUNCTION(frame->function); @@ -719,19 +802,16 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, /* Normal return — pop frame and store result in caller */ if (g_aot_depth <= base_depth) { cell_rt_leave_frame(ctx); - return result; + RETURN_DISPATCH(result); } - - JSValue caller_val = frame->caller; cell_rt_leave_frame(ctx); - - if (JS_IsNull(caller_val) || g_aot_depth < base_depth) { - return result; + if (g_aot_depth < base_depth) { + RETURN_DISPATCH(result); } /* Return to caller frame */ - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(caller_val); - g_aot_gc_refs[g_aot_depth - 1].val = caller_val; + frame_val = aot_gc_ref_at(g_aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); fp = (JSValue *)frame->slots; int ret_info = JS_VALUE_GET_INT(frame->address); int ret_slot = ret_info & 0xFFFF; @@ -742,6 +822,8 @@ JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, fn = (cell_compiled_fn)caller_fn->u.native.fn_ptr; continue; } + +#undef RETURN_DISPATCH } /* Create a native function object from a compiled fn_idx. @@ -761,7 +843,7 @@ JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp, /* Get the current frame as outer_frame for closures */ JSValue outer_frame = JS_NULL; if (g_aot_depth > 0) - outer_frame = g_aot_gc_refs[g_aot_depth - 1].val; + outer_frame = aot_gc_ref_at(g_aot_depth - 1)->val; return js_new_native_function(ctx, fn_ptr, g_current_dl_handle, (uint16_t)nr_slots, (int)nr_args, outer_frame); diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index cc70c859..ad5ffcf7 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -1545,9 +1545,14 @@ static inline void set_value (JSContext *ctx, JSValue *pval, JSValue new_val) { } void JS_ThrowInterrupted (JSContext *ctx); +int cell_rt_native_active(void); static inline __exception int js_poll_interrupts (JSContext *ctx) { if (unlikely (atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2)) { + if (cell_rt_native_active ()) { + atomic_store_explicit (&ctx->pause_flag, 0, memory_order_relaxed); + return 0; + } JS_ThrowInterrupted (ctx); return -1; } diff --git a/source/runtime.c b/source/runtime.c index e378e0c2..72f706e9 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -5366,6 +5366,10 @@ JSValue js_regexp_toString (JSContext *ctx, JSValue this_val, int argc, JSValue int lre_check_timeout (void *opaque) { JSContext *ctx = opaque; + if (cell_rt_native_active ()) { + atomic_store_explicit (&ctx->pause_flag, 0, memory_order_relaxed); + return 0; + } return atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2; }