diff --git a/bench.ce b/bench.ce index eb63782e..464193b5 100644 --- a/bench.ce +++ b/bench.ce @@ -523,9 +523,9 @@ function run_benchmarks(package_name, specific_bench) { // Find matching native bench and run it nat_b = find(native_benches, function(nb) { return nb.name == b.name }) - if (nat_b) { + if (nat_b != null) { _run_nat = function() { - nat_result = run_single_bench(nat_b.fn, b.name) + nat_result = run_single_bench(native_benches[nat_b].fn, b.name) nat_result.package = pkg_result.package nat_result.mode = "native" push(file_result.benchmarks, nat_result) diff --git a/meson.build b/meson.build index a9a953c2..17b96f7a 100644 --- a/meson.build +++ b/meson.build @@ -38,6 +38,8 @@ if host_machine.system() == 'darwin' foreach fkit : fworks deps += dependency('appleframeworks', modules: fkit) endforeach + # 32MB stack for deep native recursion (CPS patterns without TCO) + link += ['-Wl,-stack_size,0x2000000'] endif if host_machine.system() == 'playdate' diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index b9ab5bb0..4f189280 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -8,6 +8,7 @@ #include "quickjs-internal.h" #include +#include /* Non-inline wrappers for static inline functions in quickjs.h */ JSValue qbe_new_float64(JSContext *ctx, double d) { @@ -338,7 +339,8 @@ JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth, if GC moved the frame. */ int magic = (int)(int64_t)frame[QBE_FRAME_OUTER_SLOT]; frame = derive_outer_fp(magic); - if (!frame) return JS_NULL; + if (!frame) + return JS_NULL; } return frame[slot]; } @@ -359,13 +361,31 @@ void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth, update frame pointers when it moves objects. cell_rt_refresh_fp re-derives the slot pointer after any GC-triggering call. */ -#define MAX_AOT_DEPTH 256 +#define MAX_AOT_DEPTH 65536 static JSGCRef g_aot_gc_refs[MAX_AOT_DEPTH]; static int g_aot_depth = 0; +/* Check remaining C stack space to prevent segfaults from deep recursion */ +static int stack_space_ok(void) { +#ifdef __APPLE__ + char local; + void *stack_addr = pthread_get_stackaddr_np(pthread_self()); + size_t stack_size = pthread_get_stacksize_np(pthread_self()); + /* stack_addr is the TOP of the stack (highest address); stack grows down */ + uintptr_t stack_bottom = (uintptr_t)stack_addr - stack_size; + uintptr_t current = (uintptr_t)&local; + /* Keep 128KB of reserve for unwinding and error handling */ + return (current - stack_bottom) > (128 * 1024); +#else + return g_aot_depth < MAX_AOT_DEPTH; +#endif +} + JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { - if (g_aot_depth >= MAX_AOT_DEPTH) + if (g_aot_depth >= MAX_AOT_DEPTH || !stack_space_ok()) { + JS_ThrowTypeError(ctx, "native call stack overflow (depth %d)", g_aot_depth); return NULL; + } JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots); if (!frame) return NULL; JSGCRef *ref = &g_aot_gc_refs[g_aot_depth]; @@ -423,7 +443,7 @@ typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); Each native .cm module gets its own dylib. When a module creates closures via cell_rt_make_function, we record the dylib handle so the trampoline can look up the correct cell_fn_N in the right dylib. */ -#define MAX_NATIVE_FN 4096 +#define MAX_NATIVE_FN 32768 static struct { void *dl_handle; @@ -447,6 +467,17 @@ static JSValue *derive_outer_fp(int magic) { return (JSValue *)frame->slots; } +static void reclaim_native_fns(JSContext *ctx, int saved_count) { + /* Free GC refs for temporary closures created during a call */ + for (int i = saved_count; i < g_native_fn_count; i++) { + if (g_native_fn_registry[i].has_frame_ref) { + JS_DeleteGCRef(ctx, &g_native_fn_registry[i].frame_ref); + g_native_fn_registry[i].has_frame_ref = 0; + } + } + g_native_fn_count = saved_count; +} + static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val, int argc, JSValue *argv, int magic) { if (magic < 0 || magic >= g_native_fn_count) @@ -474,10 +505,31 @@ static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val, surviving GC moves */ fp[QBE_FRAME_OUTER_SLOT] = (JSValue)(int64_t)magic; + /* Set g_current_dl_handle so any closures created during this call + (e.g. inner functions returned by factory functions) are registered + against the correct dylib */ + void *prev_handle = g_current_dl_handle; + g_current_dl_handle = handle; + + /* At top-level (depth 1 = this is the outermost native call), + save the fn count so we can reclaim temporary closures after */ + int saved_fn_count = (g_aot_depth == 1) ? g_native_fn_count : -1; + JSValue result = fn(ctx, fp); cell_rt_leave_frame(ctx); - if (result == JS_EXCEPTION) + g_current_dl_handle = prev_handle; + + /* Reclaim temporary closures created during this top-level call */ + if (saved_fn_count >= 0) + reclaim_native_fns(ctx, saved_fn_count); + + if (result == JS_EXCEPTION) { + /* Ensure there is a pending exception. QBE @_exc_ret returns 15 + but may not have set one (e.g. if cell_rt_enter_frame failed). */ + if (!JS_HasException(ctx)) + JS_Throw(ctx, JS_NULL); return JS_EXCEPTION; + } return result; }