diff --git a/bench.ce b/bench.ce index beae45f4..9638ad3e 100644 --- a/bench.ce +++ b/bench.ce @@ -417,17 +417,25 @@ function format_ops(ops) { // Load a module for benchmarking in the given mode // Returns the module value, or null on failure -function load_bench_module(f, package_name, mode) { +function resolve_bench_load(f, package_name) { var mod_path = text(f, 0, -3) var use_pkg = package_name ? package_name : fd.realpath('.') - var prefix = null - var src_path = null + var prefix = testlib.get_pkg_dir(package_name) + var src_path = prefix + '/' + f + return {mod_path, use_pkg, src_path} +} + +function load_bench_module_native(f, package_name) { + var r = resolve_bench_load(f, package_name) + return shop.use_native(r.src_path, r.use_pkg) +} + +function load_bench_module(f, package_name, mode) { + var r = resolve_bench_load(f, package_name) if (mode == "native") { - prefix = testlib.get_pkg_dir(package_name) - src_path = prefix + '/' + f - return shop.use_native(src_path, use_pkg) + return load_bench_module_native(f, package_name) } - return shop.use(mod_path, use_pkg) + return shop.use(r.mod_path, r.use_pkg) } // Collect benchmark functions from a loaded module diff --git a/build.cm b/build.cm index 6deba3d5..5767fa53 100644 --- a/build.cm +++ b/build.cm @@ -80,6 +80,14 @@ function content_hash(str) { return text(crypto.blake2(bb, 32), 'h') } +// Enable AOT ASan by creating .cell/asan_aot in the package root. +function native_sanitize_flags() { + if (fd.is_file('.cell/asan_aot')) { + return ' -fsanitize=address -fno-omit-frame-pointer' + } + return '' +} + // ============================================================================ // Cache key salts — canonical registry // Every artifact type has a unique salt so hash collisions between types @@ -102,6 +110,10 @@ function manifest_path(pkg) { return get_build_dir() + '/' + content_hash(pkg + '\n' + 'manifest') } +function native_cache_content(src, target, san_flags) { + return src + '\n' + target + '\nnative\n' + (san_flags || '') +} + function get_build_dir() { return shop.get_build_dir() } @@ -365,17 +377,17 @@ function compute_link_content(objects, ldflags, target_ldflags, opts) { // Build a string representing all link inputs var parts = [] - push(parts, 'target:' + opts.target) - push(parts, 'cc:' + opts.cc) + push(parts, 'target:' + text(opts.target)) + push(parts, 'cc:' + text(opts.cc)) arrfor(sorted_objects, function(obj) { // Object paths are content-addressed, so the path itself is the hash - push(parts, 'obj:' + obj) + push(parts, 'obj:' + text(obj)) }) arrfor(ldflags, function(flag) { - push(parts, 'ldflag:' + flag) + push(parts, 'ldflag:' + text(flag)) }) arrfor(target_ldflags, function(flag) { - push(parts, 'target_ldflag:' + flag) + push(parts, 'target_ldflag:' + text(flag)) }) return text(parts, '\n') @@ -449,9 +461,9 @@ Build.build_module_dylib = function(pkg, file, target, opts) { } push(cmd_parts, '-L"' + local_dir + '"') - push(cmd_parts, '"' + obj + '"') + push(cmd_parts, '"' + text(obj) + '"') arrfor(_extra, function(extra_obj) { - push(cmd_parts, '"' + extra_obj + '"') + if (extra_obj != null) push(cmd_parts, '"' + text(extra_obj) + '"') }) cmd_parts = array(cmd_parts, resolved_ldflags) cmd_parts = array(cmd_parts, target_ldflags) @@ -491,10 +503,12 @@ Build.build_dynamic = function(pkg, target, buildtype, opts) { // Compile support sources to cached objects var sources = pkg_tools.get_sources(pkg) var support_objects = [] - arrfor(sources, function(src_file) { - var obj = Build.compile_file(pkg, src_file, _target, {buildtype: _buildtype, cflags: cached_cflags, verbose: _opts.verbose}) - push(support_objects, obj) - }) + if (pkg != 'core') { + arrfor(sources, function(src_file) { + var obj = Build.compile_file(pkg, src_file, _target, {buildtype: _buildtype, cflags: cached_cflags, verbose: _opts.verbose}) + if (obj != null) push(support_objects, obj) + }) + } arrfor(c_files, function(file) { var sym_name = shop.c_symbol_for_file(pkg, file) @@ -610,62 +624,22 @@ Build.build_static = function(packages, target, output, buildtype) { // il_parts: {data: text, functions: [text, ...]} // cc: C compiler path // tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_) -function compile_native_batched(il_parts, cc, tmp_prefix) { - var nfuncs = length(il_parts.functions) - var nbatch = 8 - var o_paths = [] - var s_paths = [] - var asm_cmds = [] - var batch_fns = null - var batch_il = null - var asm_text = null - var s_path = null - var o_path = null - var end = 0 - var bi = 0 - var fi = 0 - var ai = 0 - var rc = null - var parallel_cmd = null +function compile_native_single(il_parts, cc, tmp_prefix, extra_flags) { + var _extra = extra_flags || '' var helpers_il = (il_parts.helpers && length(il_parts.helpers) > 0) ? text(il_parts.helpers, "\n") : "" - var prefix = null - - if (nfuncs < nbatch) nbatch = nfuncs - if (nbatch < 1) nbatch = 1 - - // Generate .s files: run QBE on each batch - while (bi < nbatch) { - batch_fns = [] - end = nfuncs * (bi + 1) / nbatch - while (fi < end) { - batch_fns[] = il_parts.functions[fi] - fi = fi + 1 - } - // Batch 0 includes helper functions; others reference them as external symbols - prefix = (bi == 0 && helpers_il != "") ? helpers_il + "\n\n" : "" - batch_il = il_parts.data + "\n\n" + prefix + text(batch_fns, "\n") - asm_text = os.qbe(batch_il) - s_path = tmp_prefix + '_b' + text(bi) + '.s' - o_path = tmp_prefix + '_b' + text(bi) + '.o' - fd.slurpwrite(s_path, stone(blob(asm_text))) - s_paths[] = s_path - o_paths[] = o_path - bi = bi + 1 - } - - // Assemble all batches in parallel - while (ai < length(s_paths)) { - asm_cmds[] = cc + ' -c ' + s_paths[ai] + ' -o ' + o_paths[ai] - ai = ai + 1 - } - parallel_cmd = text(asm_cmds, ' & ') + ' & wait' - rc = os.system(parallel_cmd) + var all_fns = text(il_parts.functions, "\n") + var full_il = il_parts.data + "\n\n" + helpers_il + "\n\n" + all_fns + var asm_text = os.qbe(full_il) + var s_path = tmp_prefix + '.s' + var o_path = tmp_prefix + '.o' + var rc = null + fd.slurpwrite(s_path, stone(blob(asm_text))) + rc = os.system(cc + _extra + ' -c ' + s_path + ' -o ' + o_path) if (rc != 0) { - print('Parallel assembly failed'); disrupt + print('Assembly failed'); disrupt } - - return o_paths + return [o_path] } // Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement) @@ -710,6 +684,8 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var tc = toolchains[_target] var cc = tc.c + var san_flags = native_sanitize_flags() + var san_suffix = length(san_flags) > 0 ? '_asan' : '' // Step 1: Compile through pipeline var optimized = shop.compile_file(src_path) @@ -724,31 +700,33 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) // Content hash for cache key + var src = text(fd.slurp(src_path)) + var native_key = native_cache_content(src, _target, san_flags) var build_dir = get_build_dir() ensure_dir(build_dir) - var dylib_path = cache_path(text(fd.slurp(src_path)) + '\n' + _target, SALT_NATIVE) + var dylib_path = cache_path(native_key, SALT_NATIVE) if (fd.is_file(dylib_path)) return dylib_path // Compile and assemble via batched parallel pipeline - var tmp = '/tmp/cell_native_' + content_hash(src_path) - var rt_o_path = '/tmp/cell_qbe_rt.o' + var tmp = '/tmp/cell_native_' + content_hash(native_key) + var rt_o_path = '/tmp/cell_qbe_rt' + san_suffix + '.o' - var o_paths = compile_native_batched(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp, san_flags) // Compile QBE runtime stubs if needed var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' - rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') + rc = os.system(cc + san_flags + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') if (rc != 0) { print('QBE runtime stubs compilation failed'); disrupt } } // Link dylib - var link_cmd = cc + ' -shared -fPIC' + var link_cmd = cc + san_flags + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { @@ -782,6 +760,8 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var tc = toolchains[_target] var cc = tc.c + var san_flags = native_sanitize_flags() + var san_suffix = length(san_flags) > 0 ? '_asan' : '' var qbe_macros = use('qbe') var qbe_emit = use('qbe_emit') @@ -793,31 +773,32 @@ Build.compile_native_ir = function(optimized, src_path, opts) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) var src = text(fd.slurp(src_path)) + var native_key = native_cache_content(src, _target, san_flags) var build_dir = get_build_dir() ensure_dir(build_dir) - var dylib_path = cache_path(src + '\n' + _target, SALT_NATIVE) + var dylib_path = cache_path(native_key, SALT_NATIVE) if (fd.is_file(dylib_path)) return dylib_path // Compile and assemble via batched parallel pipeline - var tmp = '/tmp/cell_native_' + content_hash(src_path) - var rt_o_path = '/tmp/cell_qbe_rt.o' + var tmp = '/tmp/cell_native_' + content_hash(native_key) + var rt_o_path = '/tmp/cell_qbe_rt' + san_suffix + '.o' - var o_paths = compile_native_batched(il_parts, cc, tmp) + var o_paths = compile_native_single(il_parts, cc, tmp, san_flags) // Compile QBE runtime stubs if needed var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' - rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') + rc = os.system(cc + san_flags + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') if (rc != 0) { print('QBE runtime stubs compilation failed'); disrupt } } // Link dylib - var link_cmd = cc + ' -shared -fPIC' + var link_cmd = cc + san_flags + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { @@ -968,5 +949,7 @@ Build.SALT_DEPS = SALT_DEPS Build.SALT_FAIL = SALT_FAIL Build.cache_path = cache_path Build.manifest_path = manifest_path +Build.native_sanitize_flags = native_sanitize_flags +Build.native_cache_content = native_cache_content return Build diff --git a/docs/cli.md b/docs/cli.md index c07c207a..907b1d6d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -168,6 +168,9 @@ pit bench # run specific benchmark file pit bench package # benchmark a named package pit bench package # specific benchmark in a package pit bench package all # benchmark all packages +pit bench --bytecode # force bytecode-only benchmark run +pit bench --native # force native-only benchmark run +pit bench --compare # run bytecode and native side-by-side ``` Output includes median, mean, standard deviation, and percentiles for each benchmark. diff --git a/docs/shop.md b/docs/shop.md index 180cd144..73963074 100644 --- a/docs/shop.md +++ b/docs/shop.md @@ -73,7 +73,7 @@ use('gitea.pockle.world/john/renderer/sprite') ## Compilation and Caching -Every module goes through a content-addressed caching pipeline. The cache key is the BLAKE2 hash of the source content, so changing the source automatically invalidates the cache. +Every module goes through a content-addressed caching pipeline. Cache keys are based on the inputs that affect the output artifact, so changing any relevant input automatically invalidates the cache. ### Cache Hierarchy @@ -124,6 +124,8 @@ Dylibs live at content-addressed paths (`~/.cell/build/`) that can only be At runtime, when `use()` needs a C module from another package, the shop reads the manifest to find the dylib path. This means `cell build` must be run before C modules from packages can be loaded. +For native `.cm` dylibs, the cache content includes source, target, native mode marker, and sanitize flags, then uses the `native` salt. Changing any of those inputs produces a new cache path automatically. + ### Core Module Caching Core modules loaded via `use_core()` in engine.cm follow the same content-addressed pattern. On first use, a module is compiled from source and cached by the BLAKE2 hash of its source content. Subsequent loads with unchanged source hit the cache directly. diff --git a/docs/testing.md b/docs/testing.md index ea7d2cfc..38e98e48 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -118,6 +118,45 @@ When a mismatch is found: MISMATCH: test_foo: result mismatch opt=42 noopt=43 ``` +## ASAN for Native AOT + +When debugging native (`shop.use_native`) crashes, there are two useful sanitizer workflows. + +### 1) AOT-only sanitizer (fastest loop) + +Enable sanitizer flags for generated native modules by creating a marker file: + +```bash +touch .cell/asan_aot +cell --dev bench --native fibonacci +``` + +This adds `-fsanitize=address -fno-omit-frame-pointer` to AOT module compilation. + +Disable it with: + +```bash +rm -f .cell/asan_aot +``` + +### 2) Full runtime sanitizer (CLI + runtime + AOT) + +Build an ASAN-instrumented `cell` binary: + +```bash +meson setup build-asan -Dbuildtype=debug -Db_sanitize=address +CCACHE_DISABLE=1 meson compile -C build-asan +ASAN_OPTIONS=abort_on_error=1:detect_leaks=0 ./build-asan/cell --dev bench --native fibonacci +``` + +This catches bugs crossing the boundary between generated dylibs and runtime helpers. + +If stale native artifacts are suspected after compiler/runtime changes, clear build outputs first: + +```bash +cell --dev clean shop --build +``` + ## Fuzz Testing The fuzzer generates random self-checking programs, compiles them, and runs them through both optimized and unoptimized paths. Each generated program contains test functions that validate their own expected results, so failures catch both correctness bugs and optimizer mismatches. diff --git a/fold.cm b/fold.cm index 627060bd..a686c844 100644 --- a/fold.cm +++ b/fold.cm @@ -458,7 +458,7 @@ var fold = function(ast) { else if (k == "-") result = lv - rv else if (k == "*") result = lv * rv else if (k == "/") result = lv / rv - else if (k == "%") result = lv % rv + else if (k == "%") result = lv - (trunc(lv / rv) * rv) else if (k == "**") result = lv ** rv if (result == null) return make_null(expr) return make_number(result, expr) diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm index cd6f16b7..d2fb6590 100644 --- a/internal/bootstrap.cm +++ b/internal/bootstrap.cm @@ -11,7 +11,9 @@ var json_mod = use_embed('json') var crypto = use_embed('crypto') function content_hash(content) { - return text(crypto.blake2(content), 'h') + var data = content + if (!is_blob(data)) data = stone(blob(text(data))) + return text(crypto.blake2(data), 'h') } function cache_path(hash) { diff --git a/internal/engine.cm b/internal/engine.cm index bb8c7057..3f82a84e 100644 --- a/internal/engine.cm +++ b/internal/engine.cm @@ -35,7 +35,9 @@ var packages_path = shop_path ? shop_path + '/packages' : null var use_cache = {} function content_hash(content) { - return text(crypto.blake2(content), 'h') + var data = content + if (!is_blob(data)) data = stone(blob(text(data))) + return text(crypto.blake2(data), 'h') } function cache_path(hash) { diff --git a/internal/shop.cm b/internal/shop.cm index 4e5aba70..a47c30d4 100644 --- a/internal/shop.cm +++ b/internal/shop.cm @@ -449,7 +449,11 @@ function try_native_mod_dylib(pkg, stem) { var host = detect_host_target() if (!host) return null - var build_path = build_mod.cache_path(src + '\n' + host, build_mod.SALT_NATIVE) + var san_flags = build_mod.native_sanitize_flags ? build_mod.native_sanitize_flags() : '' + var native_key = build_mod.native_cache_content ? + build_mod.native_cache_content(src, host, san_flags) : + (src + '\n' + host) + var build_path = build_mod.cache_path(native_key, build_mod.SALT_NATIVE) if (!fd.is_file(build_path)) return null log.shop('native dylib cache hit: ' + stem) @@ -862,7 +866,11 @@ function read_dylib_manifest(pkg) { // Ensure all C modules for a package are built and loaded. // Returns the array of {file, symbol, dylib} results, cached per package. function ensure_package_dylibs(pkg) { - if (package_dylibs[pkg]) return package_dylibs[pkg] + if (package_dylibs[pkg] != null) return package_dylibs[pkg] + if (pkg == 'core') { + package_dylibs[pkg] = [] + return [] + } var results = null var build_mod = use_cache['core/build'] @@ -888,6 +896,7 @@ function ensure_package_dylibs(pkg) { log.shop('loaded manifest for ' + pkg + ' (' + text(length(results)) + ' modules)') } + if (results == null) results = [] package_dylibs[pkg] = results // Preload all sibling dylibs with RTLD_LAZY|RTLD_GLOBAL @@ -1910,4 +1919,4 @@ Shop.use_native = function(path, package_context) { return os.native_module_load(handle, env) } -return Shop \ No newline at end of file +return Shop diff --git a/mcode.cm b/mcode.cm index 8a8362a3..dc37d395 100644 --- a/mcode.cm +++ b/mcode.cm @@ -4,7 +4,7 @@ var mcode = function(ast) { // Translation tables var binop_map = { "+": "add", "-": "subtract", "*": "multiply", "/": "divide", - "%": "modulo", "&": "bitand", "|": "bitor", "^": "bitxor", + "%": "remainder", "&": "bitand", "|": "bitor", "^": "bitxor", "<<": "shl", ">>": "shr", ">>>": "ushr", "==": "eq", "===": "eq", "!=": "ne", "!==": "ne", "<": "lt", "<=": "le", ">": "gt", ">=": "ge", @@ -24,13 +24,13 @@ var mcode = function(ast) { var binop_sym = { add: "+", subtract: "-", multiply: "*", divide: "/", - modulo: "%", pow: "**", + remainder: "%", pow: "**", lt: "<", le: "<=", gt: ">", ge: ">=" } var compound_map = { "+=": "add", "-=": "subtract", "*=": "multiply", "/=": "divide", - "%=": "modulo", "&=": "bitand", "|=": "bitor", "^=": "bitxor", + "%=": "remainder", "&=": "bitand", "|=": "bitor", "^=": "bitxor", "<<=": "shl", ">>=": "shr", ">>>=": "ushr" } @@ -41,6 +41,28 @@ var mcode = function(ast) { length: "length" } + // Numeric intrinsic lowering maps (Tier 1 direct mcode). + var intrinsic_num_unary_ops = { + abs: "abs", + sign: "sign", + fraction: "fraction", + integer: "integer", + whole: "integer", + neg: "negate" + } + var intrinsic_num_binary_ops = { + modulo: "modulo", + remainder: "remainder", + max: "max", + min: "min" + } + var intrinsic_num_place_ops = { + floor: "floor", + ceiling: "ceiling", + round: "round", + trunc: "trunc" + } + // Compiler state var s_instructions = null var s_data = null @@ -651,7 +673,8 @@ var mcode = function(ast) { if (rel != null) { emit_relational(rel[0], rel[1], rel[2]) } else if (op_str == "subtract" || op_str == "multiply" || - op_str == "divide" || op_str == "modulo" || op_str == "pow") { + op_str == "divide" || op_str == "modulo" || op_str == "remainder" || + op_str == "pow") { emit_numeric_binop(op_str) } else { // Passthrough for bitwise, in, etc. @@ -877,6 +900,56 @@ var mcode = function(ast) { } } + // Intrinsic numeric helpers: + // preserve native intrinsic behavior for bad argument types by returning null. + var emit_intrinsic_num_unary = function(op, arg_slot) { + var dest = alloc_slot() + var t = alloc_slot() + var bad = gen_label(op + "_arg_bad") + var done = gen_label(op + "_arg_done") + emit_2("is_num", t, arg_slot) + emit_jump_cond("jump_false", t, bad) + emit_2(op, dest, arg_slot) + emit_jump(done) + emit_label(bad) + emit_1("null", dest) + emit_label(done) + return dest + } + + var emit_intrinsic_num_binary = function(op, left_slot, right_slot) { + var dest = alloc_slot() + var t0 = alloc_slot() + var t1 = alloc_slot() + var bad = gen_label(op + "_arg_bad") + var done = gen_label(op + "_arg_done") + emit_2("is_num", t0, left_slot) + emit_jump_cond("jump_false", t0, bad) + emit_2("is_num", t1, right_slot) + emit_jump_cond("jump_false", t1, bad) + emit_3(op, dest, left_slot, right_slot) + emit_jump(done) + emit_label(bad) + emit_1("null", dest) + emit_label(done) + return dest + } + + var emit_intrinsic_num_place = function(op, value_slot, place_slot) { + var dest = alloc_slot() + var t = alloc_slot() + var bad = gen_label(op + "_arg_bad") + var done = gen_label(op + "_arg_done") + emit_2("is_num", t, value_slot) + emit_jump_cond("jump_false", t, bad) + emit_3(op, dest, value_slot, place_slot) + emit_jump(done) + emit_label(bad) + emit_1("null", dest) + emit_label(done) + return dest + } + // Scan scope record for variable declarations var scan_scope = function() { var scope = find_scope_record(s_function_nr) @@ -1796,6 +1869,28 @@ var mcode = function(ast) { if (callee_kind == "name" && callee.intrinsic == true) { fname = callee.name nargs = args_list != null ? length(args_list) : 0 + mop = intrinsic_num_unary_ops[fname] + if (mop != null && nargs == 1) { + a0 = gen_expr(args_list[0], -1) + return emit_intrinsic_num_unary(mop, a0) + } + mop = intrinsic_num_binary_ops[fname] + if (mop != null && nargs == 2) { + a0 = gen_expr(args_list[0], -1) + a1 = gen_expr(args_list[1], -1) + return emit_intrinsic_num_binary(mop, a0, a1) + } + mop = intrinsic_num_place_ops[fname] + if (mop != null && (nargs == 1 || nargs == 2)) { + a0 = gen_expr(args_list[0], -1) + if (nargs == 2) { + a1 = gen_expr(args_list[1], -1) + } else { + a1 = alloc_slot() + emit_1("null", a1) + } + return emit_intrinsic_num_place(mop, a0, a1) + } // 1-arg type check intrinsics → direct opcode if (nargs == 1 && sensory_ops[fname] != null) { a0 = gen_expr(args_list[0], -1) diff --git a/meson.build b/meson.build index 17b96f7a..28ea6cc9 100644 --- a/meson.build +++ b/meson.build @@ -38,8 +38,7 @@ if host_machine.system() == 'darwin' foreach fkit : fworks deps += dependency('appleframeworks', modules: fkit) endforeach - # 32MB stack for deep native recursion (CPS patterns without TCO) - link += ['-Wl,-stack_size,0x2000000'] + # Native code uses dispatch loop (no C stack recursion) endif if host_machine.system() == 'playdate' diff --git a/qbe.cm b/qbe.cm index 2424b566..25e1e53c 100644 --- a/qbe.cm +++ b/qbe.cm @@ -11,7 +11,7 @@ def js_null = 7 def js_false = 3 def js_true = 35 def js_exception = 15 -def js_empty_text = 27 +def js_empty_text = 11 // Shared closure vars for functions with >4 params var _qop = null @@ -67,13 +67,13 @@ var is_ptr = function(p, v) { var is_imm_text = function(p, v) { return ` %${p}.t =l and ${v}, 31 - %${p} =w ceql %${p}.t, 27 + %${p} =w ceql %${p}.t, 11 ` } var is_text = function(p, v) { return ` %${p}.imm =l and ${v}, 31 - %${p}.is_imm =w ceql %${p}.imm, 27 + %${p}.is_imm =w ceql %${p}.imm, 11 jnz %${p}.is_imm, @${p}.yes, @${p}.chk_ptr @${p}.chk_ptr %${p}.ptag =l and ${v}, 7 diff --git a/qbe_emit.cm b/qbe_emit.cm index c0f24deb..314ad593 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -182,10 +182,6 @@ ${sw("w", "%fp", "%dest", "%r")} // Type checks via C (no ctx needed except is_proxy) var tc_ops = [ - ["is_text", "JS_IsText", false], - ["is_array", "JS_IsArray", false], - ["is_func", "JS_IsFunction", false], - ["is_record", "JS_IsRecord", false], ["is_stone", "JS_IsStone", false], ["is_proxy", "cell_rt_is_proxy", true] ] @@ -223,10 +219,141 @@ ${sw("w", "%fp", "%dest", "%r")} i = i + 1 } - // Float comparisons: call qbe_float_cmp(ctx, op_id, a, b) → w, tag + // is_text: immediate text OR ptr->header type check (OBJ_TEXT=2), chase forwards + h[] = `export function $__is_text_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %imm =l and %a, 31 + %is_imm =w ceql %imm, 11 + jnz %is_imm, @yes, @chk_ptr +@chk_ptr + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 2 + jmp @pack +@yes + %cr =w copy 1 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // is_record: pointer + header type check (OBJ_RECORD=3), chase forwards + h[] = `export function $__is_record_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 3 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // is_array: inline pointer+header check (OBJ_ARRAY=0), chase forwards + h[] = `export function $__is_array_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 0 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // is_func: inline pointer+header check (OBJ_FUNCTION=4), chase forwards + h[] = `export function $__is_func_ss(l %fp, l %dest, l %src) { +@entry +${sr("a", "%src")} + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr, @no +@ptr + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk + %cr =w ceql %ht, 4 + jmp @pack +@no + %cr =w copy 0 +@pack + %crext =l extuw %cr + %sh =l shl %crext, 5 + %r =l or %sh, 3 +${sw("w", "%fp", "%dest", "%r")} + ret +}` + + // Float comparisons: decode short-float/int inline, then compare in QBE. var fc_ops = [ - ["eq_float", 0], ["ne_float", 1], ["lt_float", 2], - ["le_float", 3], ["gt_float", 4], ["ge_float", 5] + ["eq_float", "ceqd"], ["ne_float", "cned"], ["lt_float", "cltd"], + ["le_float", "cled"], ["gt_float", "cgtd"], ["ge_float", "cged"] ] i = 0 while (i < length(fc_ops)) { @@ -234,7 +361,63 @@ ${sw("w", "%fp", "%dest", "%r")} @entry ${sr("a", "%s1")} ${sr("b", "%s2")} - %cr =w call $qbe_float_cmp(l %ctx, w ${fc_ops[i][1]}, l %a, l %b) + %a_tag =l and %a, 1 + %a_is_int =w ceql %a_tag, 0 + jnz %a_is_int, @a_int, @a_float +@a_int + %a_isl =l sar %a, 1 + %a_iw =w copy %a_isl + %ad =d swtof %a_iw + jmp @a_done +@a_float + %a_sexp =l shr %a, 55 + %a_sexp =l and %a_sexp, 255 + %a_is_zero =w ceql %a_sexp, 0 + jnz %a_is_zero, @a_zero, @a_decode +@a_zero + %ad =d copy d_0.0 + jmp @a_done +@a_decode + %a_sign =l shr %a, 63 + %a_mant =l shr %a, 3 + %a_mant =l and %a_mant, 4503599627370495 + %a_dexp =l sub %a_sexp, 127 + %a_dexp =l add %a_dexp, 1023 + %a_s63 =l shl %a_sign, 63 + %a_e52 =l shl %a_dexp, 52 + %a_bits =l or %a_s63, %a_e52 + %a_bits =l or %a_bits, %a_mant + %ad =d cast %a_bits +@a_done + %b_tag =l and %b, 1 + %b_is_int =w ceql %b_tag, 0 + jnz %b_is_int, @b_int, @b_float +@b_int + %b_isl =l sar %b, 1 + %b_iw =w copy %b_isl + %bd =d swtof %b_iw + jmp @b_done +@b_float + %b_sexp =l shr %b, 55 + %b_sexp =l and %b_sexp, 255 + %b_is_zero =w ceql %b_sexp, 0 + jnz %b_is_zero, @b_zero, @b_decode +@b_zero + %bd =d copy d_0.0 + jmp @b_done +@b_decode + %b_sign =l shr %b, 63 + %b_mant =l shr %b, 3 + %b_mant =l and %b_mant, 4503599627370495 + %b_dexp =l sub %b_sexp, 127 + %b_dexp =l add %b_dexp, 1023 + %b_s63 =l shl %b_sign, 63 + %b_e52 =l shl %b_dexp, 52 + %b_bits =l or %b_s63, %b_e52 + %b_bits =l or %b_bits, %b_mant + %bd =d cast %b_bits +@b_done + %cr =w ${fc_ops[i][1]} %ad, %bd %crext =l extuw %cr %sh =l shl %crext, 5 %r =l or %sh, 3 @@ -296,12 +479,72 @@ ${sw("w", "%fp", "%dest", "%r")} i = i + 1 } - // not: JS_ToBool + negate + tag + // not: inline truthiness (no JS_ToBool call) h[] = `export function $__not_ss(l %ctx, l %fp, l %dest, l %src) { @entry ${sr("a", "%src")} - %bval =w call $JS_ToBool(l %ctx, l %a) - %neg =w ceqw %bval, 0 + %t5 =l and %a, 31 + %is_bool =w ceql %t5, 3 + jnz %is_bool, @bool, @chk_null +@bool + %truthy =w cnel %a, 3 + jmp @truthy_done +@chk_null + %is_null =w ceql %t5, 7 + jnz %is_null, @falsey, @chk_int +@chk_int + %t1 =l and %a, 1 + %is_int =w ceql %t1, 0 + jnz %is_int, @int_path, @chk_imm_text +@int_path + %truthy =w cnel %a, 0 + jmp @truthy_done +@chk_imm_text + %is_imm_text =w ceql %t5, 11 + jnz %is_imm_text, @imm_text, @chk_ptr +@imm_text + %truthy =w cnel %a, 11 + jmp @truthy_done +@chk_ptr + %ptag =l and %a, 7 + %is_ptr =w ceql %ptag, 1 + jnz %is_ptr, @ptr_path, @chk_sfloat +@chk_sfloat + %is_sfloat =w ceql %ptag, 5 + jnz %is_sfloat, @sfloat_path, @other_imm +@sfloat_path + %sexp =l shr %a, 55 + %sexp =l and %sexp, 255 + %truthy =w cnel %sexp, 0 + jmp @truthy_done +@other_imm + %truthy =w copy 1 + jmp @truthy_done +@ptr_path + %ptr =l and %a, -8 + %hdr =l loadl %ptr +@chase + %ht =l and %hdr, 7 + %is_fwd =w ceql %ht, 7 + jnz %is_fwd, @follow, @chk_text_ptr +@follow + %ptr =l shr %hdr, 3 + %hdr =l loadl %ptr + jmp @chase +@chk_text_ptr + %is_text_ptr =w ceql %ht, 2 + jnz %is_text_ptr, @text_ptr, @ptr_truthy +@text_ptr + %len =l shr %hdr, 8 + %truthy =w cnel %len, 0 + jmp @truthy_done +@ptr_truthy + %truthy =w copy 1 + jmp @truthy_done +@falsey + %truthy =w copy 0 +@truthy_done + %neg =w ceqw %truthy, 0 %nex =l extuw %neg %sh =l shl %nex, 5 %r =l or %sh, 3 @@ -332,67 +575,214 @@ ${sw("w", "%fp", "%dest", "%r")} h[] = `export function $__bnot_ss(l %ctx, l %fp, l %dest, l %src) { @entry ${sr("a", "%src")} - %r =l call $qbe_bnot(l %ctx, l %a) + %tag =l and %a, 1 + %is_int =w ceql %tag, 0 + jnz %is_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %aiw =w copy %ai + %rw =w xor %aiw, -1 + %rl =l extsw %rw + %r =l shl %rl, 1 ${sw("w", "%fp", "%dest", "%r")} ret +@bad + call $cell_rt_disrupt(l %ctx) + ret }` - // Bitwise binary ops - var bw_ops = [ - ["band", "qbe_bitwise_and"], ["bor", "qbe_bitwise_or"], - ["bxor", "qbe_bitwise_xor"], ["bshl", "qbe_shift_shl"], - ["bshr", "qbe_shift_sar"], ["bushr", "qbe_shift_shr"] - ] - i = 0 - while (i < length(bw_ops)) { - h[] = `export function $__${bw_ops[i][0]}_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { + // Bitwise binary ops (int-only; type checks should be inserted upstream) + h[] = `export function $__band_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { @entry ${sr("a", "%s1")} ${sr("b", "%s2")} - %r =l call $${bw_ops[i][1]}(l %ctx, l %a, l %b) + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %rw =w and %aiw, %biw + %rl =l extsw %rw + %r =l shl %rl, 1 ${sw("w", "%fp", "%dest", "%r")} ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bor_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %rw =w or %aiw, %biw + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bxor_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %rw =w xor %aiw, %biw + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bshl_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %sh =w and %biw, 31 + %rw =w shl %aiw, %sh + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bshr_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %sh =w and %biw, 31 + %rw =w sar %aiw, %sh + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret +}` + + h[] = `export function $__bushr_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { +@entry +${sr("a", "%s1")} +${sr("b", "%s2")} + %a_tag =l and %a, 1 + %b_tag =l and %b, 1 + %a_int =w ceql %a_tag, 0 + %b_int =w ceql %b_tag, 0 + %both_int =w and %a_int, %b_int + jnz %both_int, @ok, @bad +@ok + %ai =l sar %a, 1 + %bi =l sar %b, 1 + %aiw =w copy %ai + %biw =w copy %bi + %sh =w and %biw, 31 + %rw =w shr %aiw, %sh + %rl =l extsw %rw + %r =l shl %rl, 1 +${sw("w", "%fp", "%dest", "%r")} + ret +@bad + call $cell_rt_disrupt(l %ctx) + ret }` - i = i + 1 - } // ============================================================ // Category C: Allocating helpers (return fp or 0) // ============================================================ - // Allocating binary ops: read 2 slots, call C, refresh, write dest - var ab_ops = [ - ["add", "cell_rt_add"], ["sub", "qbe_float_sub"], - ["mul", "qbe_float_mul"], ["div", "qbe_float_div"], - ["mod", "qbe_float_mod"], ["pow", "qbe_float_pow"], - ["concat", "JS_ConcatString"] - ] - i = 0 - while (i < length(ab_ops)) { - h[] = `export function l $__${ab_ops[i][0]}_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { + // concat allocates; keep refresh path + h[] = `export function l $__concat_ss(l %ctx, l %fp, l %dest, l %s1, l %s2) { @entry ${sr("a", "%s1")} ${sr("b", "%s2")} - %r =l call $${ab_ops[i][1]}(l %ctx, l %a, l %b) + %r =l call $JS_ConcatString(l %ctx, l %a, l %b) ${alloc_tail("%r")} }` - i = i + 1 - } - // Allocating unary: negate - h[] = `export function l $__neg_ss(l %ctx, l %fp, l %dest, l %src) { + // access_lit(ctx, fp, dest, lit_idx) + h[] = `export function l $__access_lit_ss(l %ctx, l %fp, l %dest, l %lit_idx) { @entry -${sr("a", "%src")} - %r =l call $qbe_float_neg(l %ctx, l %a) -${alloc_tail("%r")} + %r =l call $cell_rt_access_lit(l %ctx, l %lit_idx) + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` - // Property access: load_field(ctx, fp, dest, obj_slot, name_ptr) - h[] = `export function l $__load_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %name) { + // Property access: load_field(ctx, fp, dest, obj_slot, lit_idx) + h[] = `export function l $__load_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %lit_idx) { @entry ${sr("a", "%obj_slot")} - %r =l call $cell_rt_load_field(l %ctx, l %a, l %name) -${alloc_tail("%r")} + %r =l call $cell_rt_load_field_lit(l %ctx, l %a, l %lit_idx) + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` // load_dynamic(ctx, fp, dest, obj_slot, key_slot) @@ -401,7 +791,13 @@ ${alloc_tail("%r")} ${sr("a", "%obj_slot")} ${sr("b", "%key_slot")} %r =l call $cell_rt_load_dynamic(l %ctx, l %a, l %b) -${alloc_tail("%r")} + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` // load_index(ctx, fp, dest, arr_slot, idx_slot) @@ -410,16 +806,26 @@ ${alloc_tail("%r")} ${sr("a", "%arr_slot")} ${sr("b", "%idx_slot")} %r =l call $cell_rt_load_index(l %ctx, l %a, l %b) -${alloc_tail("%r")} + %is_exc =w ceql %r, 15 + jnz %is_exc, @exc, @ok +@ok +${sw("w", "%fp", "%dest", "%r")} + ret %fp +@exc + ret 0 }` - // store_field(ctx, fp, obj_slot, val_slot, name_ptr) — no dest write - h[] = `export function l $__store_field_ss(l %ctx, l %fp, l %obj_slot, l %val_slot, l %name) { + // store_field(ctx, fp, obj_slot, val_slot, lit_idx) — no dest write + h[] = `export function l $__store_field_ss(l %ctx, l %fp, l %obj_slot, l %val_slot, l %lit_idx) { @entry ${sr("a", "%obj_slot")} ${sr("b", "%val_slot")} - call $cell_rt_store_field(l %ctx, l %b, l %a, l %name) -${alloc_tail_nw()} + %ok =w call $cell_rt_store_field_lit(l %ctx, l %b, l %a, l %lit_idx) + jnz %ok, @ok, @exc +@ok + ret %fp +@exc + ret 0 }` // store_dynamic(ctx, fp, obj_slot, val_slot, key_slot) — no dest write @@ -428,8 +834,12 @@ ${alloc_tail_nw()} ${sr("a", "%obj_slot")} ${sr("b", "%val_slot")} ${sr("c", "%key_slot")} - call $cell_rt_store_dynamic(l %ctx, l %b, l %a, l %c) -${alloc_tail_nw()} + %ok =w call $cell_rt_store_dynamic(l %ctx, l %b, l %a, l %c) + jnz %ok, @ok, @exc +@ok + ret %fp +@exc + ret 0 }` // store_index(ctx, fp, obj_slot, val_slot, idx_slot) — no dest write @@ -438,8 +848,12 @@ ${alloc_tail_nw()} ${sr("a", "%obj_slot")} ${sr("b", "%val_slot")} ${sr("c", "%idx_slot")} - call $cell_rt_store_index(l %ctx, l %b, l %a, l %c) -${alloc_tail_nw()} + %ok =w call $cell_rt_store_index(l %ctx, l %b, l %a, l %c) + jnz %ok, @ok, @exc +@ok + ret %fp +@exc + ret 0 }` // frame(ctx, fp, dest, fn_slot, nargs) @@ -475,10 +889,10 @@ ${sw("w", "%fp2", "%result_slot", "%r")} ret 0 }` - // function(ctx, fp, dest, fn_idx, arity) - h[] = `export function l $__function_ss(l %ctx, l %fp, l %dest, l %fn_idx, l %arity) { + // function(ctx, fp, dest, fn_idx, arity, nr_slots) + h[] = `export function l $__function_ss(l %ctx, l %fp, l %dest, l %fn_idx, l %arity, l %nr_slots) { @entry - %r =l call $cell_rt_make_function(l %ctx, l %fn_idx, l %fp, l %arity) + %r =l call $cell_rt_make_function(l %ctx, l %fn_idx, l %fp, l %arity, l %nr_slots) ${alloc_tail("%r")} }` @@ -507,13 +921,14 @@ ${alloc_tail("%r")} h[] = `export function l $__new_float64_ss(l %ctx, l %fp, l %dest, d %val) { @entry %r =l call $qbe_new_float64(l %ctx, d %val) -${alloc_tail("%r")} +${sw("w", "%fp", "%dest", "%r")} + ret %fp }` - // get_intrinsic(ctx, fp, dest, name_ptr) - h[] = `export function l $__get_intrinsic_ss(l %ctx, l %fp, l %dest, l %name_ptr) { + // get_intrinsic(ctx, fp, dest, lit_idx) + h[] = `export function l $__get_intrinsic_ss(l %ctx, l %fp, l %dest, l %lit_idx) { @entry - %r =l call $cell_rt_get_intrinsic(l %ctx, l %name_ptr) + %r =l call $cell_rt_get_intrinsic_lit(l %ctx, l %lit_idx) ${alloc_tail("%r")} }` @@ -548,11 +963,11 @@ ${sr("a", "%src")} ${alloc_tail("%r")} }` - // delete_field(ctx, fp, dest, obj_slot, name_ptr) - h[] = `export function l $__delete_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %name) { + // delete_field(ctx, fp, dest, obj_slot, lit_idx) + h[] = `export function l $__delete_field_ss(l %ctx, l %fp, l %dest, l %obj_slot, l %lit_idx) { @entry ${sr("a", "%obj_slot")} - %r =l call $cell_rt_delete_str(l %ctx, l %a, l %name) + %r =l call $cell_rt_delete_lit(l %ctx, l %a, l %lit_idx) ${alloc_tail("%r")} }` @@ -588,8 +1003,11 @@ var qbe_emit = function(ir, qbe, export_name) { var out = [] var data_out = [] var str_table = {} + var str_entries = [] var str_id = 0 var uid = 0 + var lit_data = null + var si = 0 // ============================================================ // Output helpers @@ -631,8 +1049,10 @@ var qbe_emit = function(ir, qbe, export_name) { escaped = replace(escaped, "\t", "\\t") var line = "data " + label + ' = ' + '{ b "' + escaped + '", b 0 }' push(data_out, line) - str_table[val] = label - return label + var entry = { label: label, idx: length(str_entries) } + push(str_entries, entry) + str_table[val] = entry + return entry } // ============================================================ @@ -670,6 +1090,7 @@ var qbe_emit = function(ir, qbe, export_name) { var ei = 0 var elem_slot = 0 var v = null + var rv = null var lhs = null var rhs = null var obj = null @@ -680,11 +1101,164 @@ var qbe_emit = function(ir, qbe, export_name) { var tol = null var fn_arity = 0 var arity_tmp = null + var fn_nr_slots = 0 + var invoke_count = 0 + var si = 0 + var scan = null + var scan_op = null + var label_pos = {} + var instr_idx = 0 + var has_invokes = false + var seg_counter = 0 + var ri = 0 + var seg_num = 0 + var resume_val = 0 + // Native calls should mirror MACH semantics: function calls are mediated + // by the frame dispatcher, not recursive C calls. + var use_invoke_trampoline = true + var j_lbl = null + var j_idx = null + var jt_lbl = null + var jt_idx = null + var jt_backedge = false + var jf_lbl = null + var jf_idx = null + var jf_backedge = false + var jn_lbl = null + var jn_idx = null + var jn_backedge = false + var jnn_lbl = null + var jnn_idx = null + var jnn_backedge = false + var truthy = null + var lhs_d = null + var rhs_d = null + var peek1 = null + var peek2 = null + var peek3 = null + var peek4 = null + var peek5 = null + var floor_frame_slot = 0 + var floor_this_slot = 0 + var floor_arg_slot = 0 + var floor_dest_slot = 0 + var text_frame_slot = 0 + var text_this_slot = 0 + var text_arg_slot = 0 + var text_dest_slot = 0 + + // Pre-scan: count invoke/tail_invoke points to assign segment numbers. + // Must skip dead code (instructions after terminators) the same way + // the main emission loop does, otherwise we create jump table entries + // for segments that never get emitted. + var scan_dead = false + si = 0 + while (si < length(instrs)) { + scan = instrs[si] + si = si + 1 + if (is_text(scan)) { + // Skip optimizer nop pseudo-labels entirely. + if (starts_with(scan, "_nop_")) continue + label_pos[sanitize(scan)] = si - 1 + // Real labels reset dead code state. + scan_dead = false + continue + } + if (scan_dead) continue + if (!is_array(scan)) continue + scan_op = scan[0] + + // Keep invoke segment counting consistent with main-loop peephole: + // inline floor intrinsic call sequence does not emit an invoke. + if (false && scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "floor") { + if (si + 4 < length(instrs)) { + peek1 = instrs[si] + peek2 = instrs[si + 1] + peek3 = instrs[si + 2] + peek4 = instrs[si + 3] + peek5 = instrs[si + 4] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + floor_frame_slot = peek1[1] + floor_this_slot = peek2[1] + if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot && + peek4[1] == floor_frame_slot && peek4[2] == 1 && + peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) { + si = si + 5 + continue + } + } + } + } + + // Keep invoke segment counting consistent with main-loop peephole: + // inline text intrinsic call sequence does not emit an invoke. + if (scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "text") { + if (si + 4 < length(instrs)) { + peek1 = instrs[si] + peek2 = instrs[si + 1] + peek3 = instrs[si + 2] + peek4 = instrs[si + 3] + peek5 = instrs[si + 4] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + text_frame_slot = peek1[1] + text_this_slot = peek2[1] + if (peek3[1] == text_frame_slot && peek3[2] == 0 && peek3[3] == text_this_slot && + peek4[1] == text_frame_slot && peek4[2] == 1 && + peek5[1] == text_frame_slot && peek5[2] == text_this_slot) { + si = si + 5 + continue + } + } + } + } + + if (use_invoke_trampoline && (scan_op == "invoke" || scan_op == "tail_invoke")) { + invoke_count = invoke_count + 1 + } + // Track terminators — same set as in the main loop + if (scan_op == "return" || scan_op == "jump" || scan_op == "goinvoke" || scan_op == "disrupt") { + scan_dead = true + } + } + has_invokes = use_invoke_trampoline && invoke_count > 0 // Function signature: (ctx, frame_ptr) → JSValue emit(`export function l $${name}(l %ctx, l %fp) {`) emit("@entry") + // Resume dispatch: if this function has invoke points, read the segment + // number from frame->address and jump to the right resume point. + // frame->address is at fp - 8 (last field before slots[]). + if (has_invokes) { + emit(" %addr_ptr =l sub %fp, 8") + emit(" %addr_raw =l loadl %addr_ptr") + // address is stored as JS_NewInt32 tagged value: n << 1 + emit(" %addr =l sar %addr_raw, 1") + emit(" %resume =l shr %addr, 16") + emit(` jnz %resume, @_rcheck1, @_seg0`) + ri = 1 + while (ri <= invoke_count) { + emit(`@_rcheck${text(ri)}`) + emit(` %_rc${text(ri)} =w ceql %resume, ${text(ri)}`) + if (ri < invoke_count) { + emit(` jnz %_rc${text(ri)}, @_seg${text(ri)}, @_rcheck${text(ri + 1)}`) + } else { + // Last check — if no match, fall through to seg0 + emit(` jnz %_rc${text(ri)}, @_seg${text(ri)}, @_seg0`) + } + ri = ri + 1 + } + emit("@_seg0") + } + // GC-safe slot access: every read/write goes through frame memory. // %fp may become stale after GC-triggering calls — use refresh_fp(). var s_read = function(slot) { @@ -732,11 +1306,180 @@ var qbe_emit = function(ir, qbe, export_name) { emit(`@${lbl}_ok`) } + // Poll pause/interrupt state on taken backward jumps. + var emit_backedge_branch = function(target_label) { + var chk_lbl = fresh() + emit(` %${chk_lbl} =w call $cell_rt_check_backedge(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jnz %${chk_lbl}, @disruption_handler, @${target_label}`) + } else { + needs_exc_ret = true + emit(` jnz %${chk_lbl}, @_exc_ret, @${target_label}`) + } + } + + // Inline JS_ToBool equivalent for hot branch paths. + // Returns a `%name` holding w 0/1 truthiness. + var emit_truthy_w = function(val) { + var tp = fresh() + emit(` %${tp}_t5 =l and ${val}, 31`) + emit(` %${tp}_is_bool =w ceql %${tp}_t5, 3`) + emit(` jnz %${tp}_is_bool, @${tp}_bool, @${tp}_chk_null`) + emit(`@${tp}_bool`) + emit(` %${tp}_truthy =w cnel ${val}, 3`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_chk_null`) + emit(` %${tp}_is_null =w ceql %${tp}_t5, 7`) + emit(` jnz %${tp}_is_null, @${tp}_falsey, @${tp}_chk_int`) + emit(`@${tp}_chk_int`) + emit(` %${tp}_t1 =l and ${val}, 1`) + emit(` %${tp}_is_int =w ceql %${tp}_t1, 0`) + emit(` jnz %${tp}_is_int, @${tp}_int_path, @${tp}_chk_imm_text`) + emit(`@${tp}_int_path`) + emit(` %${tp}_truthy =w cnel ${val}, 0`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_chk_imm_text`) + emit(` %${tp}_is_imm_text =w ceql %${tp}_t5, 11`) + emit(` jnz %${tp}_is_imm_text, @${tp}_imm_text, @${tp}_chk_ptr`) + emit(`@${tp}_imm_text`) + emit(` %${tp}_truthy =w cnel ${val}, 11`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_chk_ptr`) + emit(` %${tp}_ptag =l and ${val}, 7`) + emit(` %${tp}_is_ptr =w ceql %${tp}_ptag, 1`) + emit(` jnz %${tp}_is_ptr, @${tp}_ptr_path, @${tp}_chk_sfloat`) + emit(`@${tp}_chk_sfloat`) + emit(` %${tp}_is_sfloat =w ceql %${tp}_ptag, 5`) + emit(` jnz %${tp}_is_sfloat, @${tp}_sfloat_path, @${tp}_other_imm`) + emit(`@${tp}_sfloat_path`) + emit(` %${tp}_sexp =l shr ${val}, 55`) + emit(` %${tp}_sexp =l and %${tp}_sexp, 255`) + emit(` %${tp}_truthy =w cnel %${tp}_sexp, 0`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_other_imm`) + emit(` %${tp}_truthy =w copy 1`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_ptr_path`) + emit(` %${tp}_ptr =l and ${val}, -8`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(`@${tp}_chase`) + emit(` %${tp}_ht =l and %${tp}_hdr, 7`) + emit(` %${tp}_is_fwd =w ceql %${tp}_ht, 7`) + emit(` jnz %${tp}_is_fwd, @${tp}_follow, @${tp}_chk_text_ptr`) + emit(`@${tp}_follow`) + emit(` %${tp}_ptr =l shr %${tp}_hdr, 3`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(` jmp @${tp}_chase`) + emit(`@${tp}_chk_text_ptr`) + emit(` %${tp}_is_text_ptr =w ceql %${tp}_ht, 2`) + emit(` jnz %${tp}_is_text_ptr, @${tp}_text_ptr, @${tp}_ptr_truthy`) + emit(`@${tp}_text_ptr`) + emit(` %${tp}_len =l shr %${tp}_hdr, 8`) + emit(` %${tp}_truthy =w cnel %${tp}_len, 0`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_ptr_truthy`) + emit(` %${tp}_truthy =w copy 1`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_falsey`) + emit(` %${tp}_truthy =w copy 0`) + emit(`@${tp}_done`) + return `%${tp}_truthy` + } + + // Returns w 0/1 for JS text (immediate or heap), following forwards. + var emit_is_text_w = function(val) { + var tp = fresh() + emit(` %${tp}_imm =l and ${val}, 31`) + emit(` %${tp}_is_imm =w ceql %${tp}_imm, 11`) + emit(` jnz %${tp}_is_imm, @${tp}_yes, @${tp}_chk_ptr`) + emit(`@${tp}_chk_ptr`) + emit(` %${tp}_ptag =l and ${val}, 7`) + emit(` %${tp}_is_ptr =w ceql %${tp}_ptag, 1`) + emit(` jnz %${tp}_is_ptr, @${tp}_ptr, @${tp}_no`) + emit(`@${tp}_ptr`) + emit(` %${tp}_ptr =l and ${val}, -8`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(`@${tp}_chase`) + emit(` %${tp}_ht =l and %${tp}_hdr, 7`) + emit(` %${tp}_is_fwd =w ceql %${tp}_ht, 7`) + emit(` jnz %${tp}_is_fwd, @${tp}_follow, @${tp}_chk`) + emit(`@${tp}_follow`) + emit(` %${tp}_ptr =l shr %${tp}_hdr, 3`) + emit(` %${tp}_hdr =l loadl %${tp}_ptr`) + emit(` jmp @${tp}_chase`) + emit(`@${tp}_chk`) + emit(` %${tp}_is_text =w ceql %${tp}_ht, 2`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_yes`) + emit(` %${tp}_is_text =w copy 1`) + emit(` jmp @${tp}_done`) + emit(`@${tp}_no`) + emit(` %${tp}_is_text =w copy 0`) + emit(`@${tp}_done`) + return `%${tp}_is_text` + } + + // Returns w 0/1 for JS numbers (int or short-float). + var emit_is_num_w = function(val) { + var np = fresh() + emit(` %${np}_t1 =l and ${val}, 1`) + emit(` %${np}_ii =w ceql %${np}_t1, 0`) + emit(` %${np}_t2 =l and ${val}, 7`) + emit(` %${np}_fi =w ceql %${np}_t2, 5`) + emit(` %${np}_is_num =w or %${np}_ii, %${np}_fi`) + return `%${np}_is_num` + } + + // Pack w 0/1 into tagged JS bool (JS_FALSE/JS_TRUE). + var emit_pack_bool_js = function(wv) { + var bp = fresh() + emit(` %${bp}_ext =l extuw ${wv}`) + emit(` %${bp}_sh =l shl %${bp}_ext, 5`) + emit(` %${bp}_js =l or %${bp}_sh, 3`) + return `%${bp}_js` + } + + // Convert a known numeric JSValue (int or short-float) to QBE double. + // Type checks happen earlier in mcode/streamline. + var emit_num_to_double = function(val) { + var np = fresh() + emit(` %${np}_tag =l and ${val}, 1`) + emit(` %${np}_is_int =w ceql %${np}_tag, 0`) + emit(` jnz %${np}_is_int, @${np}_int, @${np}_float`) + emit(`@${np}_int`) + emit(` %${np}_isl =l sar ${val}, 1`) + emit(` %${np}_iw =w copy %${np}_isl`) + emit(` %${np}_d =d swtof %${np}_iw`) + emit(` jmp @${np}_done`) + emit(`@${np}_float`) + emit(` %${np}_sexp =l shr ${val}, 55`) + emit(` %${np}_sexp =l and %${np}_sexp, 255`) + emit(` %${np}_is_zero =w ceql %${np}_sexp, 0`) + emit(` jnz %${np}_is_zero, @${np}_fzero, @${np}_fdecode`) + emit(`@${np}_fzero`) + emit(` %${np}_d =d copy d_0.0`) + emit(` jmp @${np}_done`) + emit(`@${np}_fdecode`) + emit(` %${np}_sign =l shr ${val}, 63`) + emit(` %${np}_mant =l shr ${val}, 3`) + emit(` %${np}_mant =l and %${np}_mant, 4503599627370495`) + emit(` %${np}_dexp =l sub %${np}_sexp, 127`) + emit(` %${np}_dexp =l add %${np}_dexp, 1023`) + emit(` %${np}_s63 =l shl %${np}_sign, 63`) + emit(` %${np}_e52 =l shl %${np}_dexp, 52`) + emit(` %${np}_bits =l or %${np}_s63, %${np}_e52`) + emit(` %${np}_bits =l or %${np}_bits, %${np}_mant`) + emit(` %${np}_d =d cast %${np}_bits`) + emit(`@${np}_done`) + return `%${np}_d` + } + // Walk instructions var last_was_term = false i = 0 while (i < length(instrs)) { instr = instrs[i] + instr_idx = i // Emit @disruption_handler at the right flat index // disruption_pc counts all entries (labels + instructions) @@ -754,7 +1497,7 @@ var qbe_emit = function(ir, qbe, export_name) { // Labels are plain strings; skip nop pseudo-labels from streamline if (is_text(instr)) { - if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue + if (starts_with(instr, "_nop_")) continue lbl = sanitize(instr) if (!last_was_term) { emit(` jmp @${lbl}`) @@ -773,6 +1516,80 @@ var qbe_emit = function(ir, qbe, export_name) { a3 = instr[3] last_was_term = false + // Peephole: inline `floor(x)` intrinsic call sequence + // access floor; frame; null this; setarg 0 this; setarg 1 x; invoke + if (false && op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "floor") { + if (instr_idx + 5 < length(instrs)) { + peek1 = instrs[instr_idx + 1] + peek2 = instrs[instr_idx + 2] + peek3 = instrs[instr_idx + 3] + peek4 = instrs[instr_idx + 4] + peek5 = instrs[instr_idx + 5] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + floor_frame_slot = peek1[1] + floor_this_slot = peek2[1] + if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot && + peek4[1] == floor_frame_slot && peek4[2] == 1 && + peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) { + floor_arg_slot = peek4[3] + floor_dest_slot = peek5[2] + v = s_read(floor_arg_slot) + p = fresh() + emit(` %${p}_is_num =w copy ${emit_is_num_w(v)}`) + emit(` jnz %${p}_is_num, @${p}_ok, @${p}_bad`) + emit(`@${p}_bad`) + s_write(floor_dest_slot, text(qbe.js_null)) + emit(` jmp @${p}_done`) + emit(`@${p}_ok`) + lhs_d = emit_num_to_double(v) + emit(` %${p}_fd =d call $floor(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_fd)`) + s_write(floor_dest_slot, `%${p}_r`) + emit(`@${p}_done`) + i = instr_idx + 6 + continue + } + } + } + } + + // Peephole: inline `text(x)` intrinsic call sequence + // access text; frame; null this; setarg 0 this; setarg 1 x; invoke + if (op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "text") { + if (instr_idx + 5 < length(instrs)) { + peek1 = instrs[instr_idx + 1] + peek2 = instrs[instr_idx + 2] + peek3 = instrs[instr_idx + 3] + peek4 = instrs[instr_idx + 4] + peek5 = instrs[instr_idx + 5] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + text_frame_slot = peek1[1] + text_this_slot = peek2[1] + if (peek3[1] == text_frame_slot && peek3[2] == 0 && peek3[3] == text_this_slot && + peek4[1] == text_frame_slot && peek4[2] == 1 && + peek5[1] == text_frame_slot && peek5[2] == text_this_slot) { + text_arg_slot = peek4[3] + text_dest_slot = peek5[2] + v = s_read(text_arg_slot) + p = fresh() + emit(` %${p}_r =l call $JS_CellText(l %ctx, l ${v})`) + refresh_fp() + s_write(text_dest_slot, `%${p}_r`) + i = instr_idx + 6 + continue + } + } + } + } + // --- Constants --- if (op == "int") { @@ -801,12 +1618,12 @@ var qbe_emit = function(ir, qbe, export_name) { } } else if (is_text(a2)) { sl = intern_str(a2) - emit(` %fp =l call $__new_string_ss(l %ctx, l %fp, l ${text(a1)}, l ${sl})`) + emit(` %fp =l call $__access_lit_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(sl.idx)})`) emit_exc_check() } else if (is_object(a2)) { if (a2.make == "intrinsic") { sl = intern_str(a2.name) - emit(` %fp =l call $__get_intrinsic_ss(l %ctx, l %fp, l ${text(a1)}, l ${sl})`) + emit(` %fp =l call $__get_intrinsic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(sl.idx)})`) emit_exc_check() } else if (a2.kind == "number") { if (a2.number != null && is_integer(a2.number)) { @@ -819,7 +1636,7 @@ var qbe_emit = function(ir, qbe, export_name) { } } else if (a2.kind == "text") { sl = intern_str(a2.value) - emit(` %fp =l call $__new_string_ss(l %ctx, l %fp, l ${text(a1)}, l ${sl})`) + emit(` %fp =l call $__access_lit_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(sl.idx)})`) emit_exc_check() } else if (a2.kind == "true") { s_write(a1, text(qbe.js_true)) @@ -839,46 +1656,353 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Movement --- if (op == "move") { - emit(` call $__move_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + s_write(a1, v) continue } // --- Generic arithmetic (VM dispatches int/float) --- if (op == "add") { - emit(` %fp =l call $__add_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_sum =l add %${p}_ai, %${p}_bi`) + emit(` %${p}_sumw =w copy %${p}_sum`) + emit(` %${p}_sumext =l extsw %${p}_sumw`) + emit(` %${p}_sum_ok =w ceql %${p}_sumext, %${p}_sum`) + emit(` jnz %${p}_sum_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_sum, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + emit(` # mixed add: numeric add, text concat, else disrupt`) + emit(` %${p}_a_num =w copy ${emit_is_num_w(lhs)}`) + emit(` %${p}_b_num =w copy ${emit_is_num_w(rhs)}`) + emit(` %${p}_both_num =w and %${p}_a_num, %${p}_b_num`) + emit(` jnz %${p}_both_num, @${p}_num_add, @${p}_chk_text`) + emit(`@${p}_num_add`) + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d add ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(` jmp @${p}_done`) + emit(`@${p}_chk_text`) + emit(` %${p}_a_txt =w copy ${emit_is_text_w(lhs)}`) + emit(` %${p}_b_txt =w copy ${emit_is_text_w(rhs)}`) + emit(` %${p}_both_txt =w and %${p}_a_txt, %${p}_b_txt`) + emit(` jnz %${p}_both_txt, @${p}_txt_add, @${p}_bad`) + emit(`@${p}_txt_add`) + emit(` %fp =l call $__concat_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) emit_exc_check() + emit(` jmp @${p}_done`) + emit(`@${p}_bad`) + emit(` call $cell_rt_disrupt(l %ctx)`) + if (has_handler && !in_handler) { + emit(` jmp @disruption_handler`) + } else { + emit(` ret 15`) + } + emit(`@${p}_done`) continue } if (op == "subtract") { - emit(` %fp =l call $__sub_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_diff =l sub %${p}_ai, %${p}_bi`) + emit(` %${p}_diffw =w copy %${p}_diff`) + emit(` %${p}_diffext =l extsw %${p}_diffw`) + emit(` %${p}_diff_ok =w ceql %${p}_diffext, %${p}_diff`) + emit(` jnz %${p}_diff_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_diff, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d sub ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "multiply") { - emit(` %fp =l call $__mul_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_a_tag =l and ${lhs}, 1`) + emit(` %${p}_b_tag =l and ${rhs}, 1`) + emit(` %${p}_a_int =w ceql %${p}_a_tag, 0`) + emit(` %${p}_b_int =w ceql %${p}_b_tag, 0`) + emit(` %${p}_both_int =w and %${p}_a_int, %${p}_b_int`) + emit(` jnz %${p}_both_int, @${p}_int, @${p}_slow`) + emit(`@${p}_int`) + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_prod =l mul %${p}_ai, %${p}_bi`) + emit(` %${p}_prodw =w copy %${p}_prod`) + emit(` %${p}_prodext =l extsw %${p}_prodw`) + emit(` %${p}_prod_ok =w ceql %${p}_prodext, %${p}_prod`) + emit(` jnz %${p}_prod_ok, @${p}_int_store, @${p}_slow`) + emit(`@${p}_int_store`) + emit(` %${p}_tag =l shl %${p}_prod, 1`) + s_write(a1, `%${p}_tag`) + emit(` jmp @${p}_done`) + emit(`@${p}_slow`) + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d mul ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "divide") { - emit(` %fp =l call $__div_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d div ${lhs_d}, ${rhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } if (op == "modulo") { - emit(` %fp =l call $__mod_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_lhs_nan =w cned ${lhs_d}, ${lhs_d}`) + emit(` %${p}_rhs_nan =w cned ${rhs_d}, ${rhs_d}`) + emit(` %${p}_has_nan =w or %${p}_lhs_nan, %${p}_rhs_nan`) + emit(` jnz %${p}_has_nan, @${p}_bad, @${p}_chk0`) + emit(`@${p}_chk0`) + emit(` %${p}_rhs0 =w ceqd ${rhs_d}, d_0.0`) + emit(` jnz %${p}_rhs0, @${p}_bad, @${p}_calc`) + emit(`@${p}_calc`) + emit(` %${p}_q =d div ${lhs_d}, ${rhs_d}`) + emit(` %${p}_qf =d call $floor(d %${p}_q)`) + emit(` %${p}_m =d mul ${rhs_d}, %${p}_qf`) + emit(` %${p}_rd =d sub ${lhs_d}, %${p}_m`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(` jmp @${p}_done`) + emit(`@${p}_bad`) + s_write(a1, text(qbe.js_null)) + emit(`@${p}_done`) + continue + } + if (op == "remainder") { + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rhs0 =w ceqd ${rhs_d}, d_0.0`) + emit(` jnz %${p}_rhs0, @${p}_bad, @${p}_calc`) + emit(`@${p}_calc`) + emit(` %${p}_q =d div ${lhs_d}, ${rhs_d}`) + emit(` %${p}_qt =d call $trunc(d %${p}_q)`) + emit(` %${p}_m =d mul ${rhs_d}, %${p}_qt`) + emit(` %${p}_rd =d sub ${lhs_d}, %${p}_m`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(` jmp @${p}_done`) + emit(`@${p}_bad`) + s_write(a1, text(qbe.js_null)) + emit(`@${p}_done`) + continue + } + if (op == "max" || op == "min") { + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + if (op == "max") { + emit(` %${p}_take_l =w cgtd ${lhs_d}, ${rhs_d}`) + } else { + emit(` %${p}_take_l =w cltd ${lhs_d}, ${rhs_d}`) + } + emit(` jnz %${p}_take_l, @${p}_lhs, @${p}_rhs`) + emit(`@${p}_lhs`) + emit(` %${p}_rd =d copy ${lhs_d}`) + emit(` jmp @${p}_done_math`) + emit(`@${p}_rhs`) + emit(` %${p}_rd =d copy ${rhs_d}`) + emit(`@${p}_done_math`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "abs") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_rd =d call $fabs(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "sign") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_lt0 =w cltd ${lhs_d}, d_0.0`) + emit(` jnz %${p}_lt0, @${p}_neg, @${p}_chk_pos`) + emit(`@${p}_chk_pos`) + emit(` %${p}_gt0 =w cgtd ${lhs_d}, d_0.0`) + emit(` jnz %${p}_gt0, @${p}_pos, @${p}_zero`) + emit(`@${p}_neg`) + s_write(a1, text(-2)) + emit(` jmp @${p}_done`) + emit(`@${p}_pos`) + s_write(a1, text(2)) + emit(` jmp @${p}_done`) + emit(`@${p}_zero`) + s_write(a1, text(0)) + emit(`@${p}_done`) + continue + } + if (op == "fraction") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_ti =d call $trunc(d ${lhs_d})`) + emit(` %${p}_rd =d sub ${lhs_d}, %${p}_ti`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "integer") { + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_rd =d call $trunc(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + continue + } + if (op == "floor" || op == "ceiling" || op == "round" || op == "trunc") { + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_lhs_num =w copy ${emit_is_num_w(lhs)}`) + emit(` jnz %${p}_lhs_num, @${p}_place, @${p}_bad`) + emit(`@${p}_place`) + emit(` %${p}_t1 =l and ${rhs}, 1`) + emit(` %${p}_is_int =w ceql %${p}_t1, 0`) + emit(` jnz %${p}_is_int, @${p}_pi_int, @${p}_pi_not_int`) + emit(`@${p}_pi_int`) + emit(` %${p}_pil =l sar ${rhs}, 1`) + emit(` %${p}_piw =w copy %${p}_pil`) + emit(` jmp @${p}_pi_done`) + emit(`@${p}_pi_not_int`) + emit(` %${p}_t5 =l and ${rhs}, 31`) + emit(` %${p}_is_null =w ceql %${p}_t5, 7`) + emit(` jnz %${p}_is_null, @${p}_pi_zero, @${p}_pi_chk_bool`) + emit(`@${p}_pi_zero`) + emit(` %${p}_piw =w copy 0`) + emit(` jmp @${p}_pi_done`) + emit(`@${p}_pi_chk_bool`) + emit(` %${p}_is_bool =w ceql %${p}_t5, 3`) + emit(` jnz %${p}_is_bool, @${p}_pi_bool, @${p}_pi_chk_float`) + emit(`@${p}_pi_bool`) + emit(` %${p}_bl =l shr ${rhs}, 5`) + emit(` %${p}_bw =w copy %${p}_bl`) + emit(` %${p}_piw =w and %${p}_bw, 1`) + emit(` jmp @${p}_pi_done`) + emit(`@${p}_pi_chk_float`) + emit(` %${p}_t3 =l and ${rhs}, 7`) + emit(` %${p}_is_float =w ceql %${p}_t3, 5`) + emit(` jnz %${p}_is_float, @${p}_pi_float, @${p}_bad`) + emit(`@${p}_pi_float`) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_piw =w dtosi ${rhs_d}`) + emit(`@${p}_pi_done`) + emit(` %${p}_is_zero =w ceqw %${p}_piw, 0`) + emit(` jnz %${p}_is_zero, @${p}_direct, @${p}_scaled`) + emit(`@${p}_direct`) + if (op == "floor") { + emit(` %${p}_rd =d call $floor(d ${lhs_d})`) + } else if (op == "ceiling") { + emit(` %${p}_rd =d call $ceil(d ${lhs_d})`) + } else if (op == "round") { + emit(` %${p}_rd =d call $round(d ${lhs_d})`) + } else { + emit(` %${p}_rd =d call $trunc(d ${lhs_d})`) + } + emit(` jmp @${p}_store`) + emit(`@${p}_scaled`) + emit(` %${p}_pl =l extsw %${p}_piw`) + emit(` %${p}_pd =d sltof %${p}_pl`) + emit(` %${p}_negpd =d neg %${p}_pd`) + emit(` %${p}_mult =d call $pow(d d_10.0, d %${p}_negpd)`) + emit(` %${p}_sd =d mul ${lhs_d}, %${p}_mult`) + if (op == "floor") { + emit(` %${p}_sr =d call $floor(d %${p}_sd)`) + } else if (op == "ceiling") { + emit(` %${p}_sr =d call $ceil(d %${p}_sd)`) + } else if (op == "round") { + emit(` %${p}_sr =d call $round(d %${p}_sd)`) + } else { + emit(` %${p}_sr =d call $trunc(d %${p}_sd)`) + } + emit(` %${p}_rd =d div %${p}_sr, %${p}_mult`) + emit(` jmp @${p}_store`) + emit(`@${p}_bad`) + s_write(a1, text(qbe.js_null)) + emit(` jmp @${p}_done`) + emit(`@${p}_store`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) + emit(`@${p}_done`) continue } if (op == "negate") { - emit(` %fp =l call $__neg_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) - emit_exc_check() + lhs = s_read(a2) + p = fresh() + lhs_d = emit_num_to_double(lhs) + emit(` %${p}_rd =d neg ${lhs_d}`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } if (op == "pow") { - emit(` %fp =l call $__pow_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) - emit_exc_check() + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + lhs_d = emit_num_to_double(lhs) + rhs_d = emit_num_to_double(rhs) + emit(` %${p}_rd =d call $pow(d ${lhs_d}, d ${rhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_rd)`) + s_write(a1, `%${p}_r`) continue } @@ -893,27 +2017,45 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Type checks — use qbe.cm macros (no GC, no refresh) --- if (op == "is_int") { - emit(` call $__is_int_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + p = fresh() + emit(` %${p}_tag =l and ${v}, 1`) + emit(` %${p}_w =w ceql %${p}_tag, 0`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_text") { - emit(` call $__is_text_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + s_write(a1, emit_pack_bool_js(emit_is_text_w(v))) continue } if (op == "is_num") { - emit(` call $__is_num_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + s_write(a1, emit_pack_bool_js(emit_is_num_w(v))) continue } if (op == "is_bool") { - emit(` call $__is_bool_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + p = fresh() + emit(` %${p}_t5 =l and ${v}, 31`) + emit(` %${p}_w =w ceql %${p}_t5, 3`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_null") { - emit(` call $__is_null_ss(l %fp, l ${text(a1)}, l ${text(a2)})`) + v = s_read(a2) + p = fresh() + emit(` %${p}_t5 =l and ${v}, 31`) + emit(` %${p}_w =w ceql %${p}_t5, 7`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_identical") { - emit(` call $__is_identical_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_w =w ceql ${lhs}, ${rhs}`) + s_write(a1, emit_pack_bool_js(`%${p}_w`)) continue } if (op == "is_array") { @@ -940,27 +2082,93 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Comparisons (int path, no GC) --- if (op == "eq_int") { - emit(` call $__eq_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w ceqw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "ne_int") { - emit(` call $__ne_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w cnew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "lt_int") { - emit(` call $__lt_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csltw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "gt_int") { - emit(` call $__gt_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csgtw %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "le_int") { - emit(` call $__le_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w cslew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } if (op == "ge_int") { - emit(` call $__ge_int_ss(l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) + lhs = s_read(a2) + rhs = s_read(a3) + p = fresh() + emit(` %${p}_ai =l sar ${lhs}, 1`) + emit(` %${p}_bi =l sar ${rhs}, 1`) + emit(` %${p}_aiw =w copy %${p}_ai`) + emit(` %${p}_biw =w copy %${p}_bi`) + emit(` %${p}_cr =w csgew %${p}_aiw, %${p}_biw`) + emit(` %${p}_crext =l extuw %${p}_cr`) + emit(` %${p}_sh =l shl %${p}_crext, 5`) + emit(` %${p}_r =l or %${p}_sh, 3`) + s_write(a1, `%${p}_r`) continue } @@ -1077,7 +2285,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__load_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1102,7 +2310,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__load_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__load_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1123,7 +2331,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__store_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1150,7 +2358,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__store_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__store_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1177,39 +2385,81 @@ var qbe_emit = function(ir, qbe, export_name) { // --- Control flow --- if (op == "jump") { - emit(` jmp @${sanitize(a1)}`) + j_lbl = sanitize(a1) + j_idx = label_pos[j_lbl] + if (j_idx != null && j_idx < instr_idx) { + emit_backedge_branch(j_lbl) + } else { + emit(` jmp @${j_lbl}`) + } last_was_term = true continue } if (op == "jump_true") { v = s_read(a1) p = fresh() - emit(` %${p} =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`) + jt_lbl = sanitize(a2) + jt_idx = label_pos[jt_lbl] + jt_backedge = jt_idx != null && jt_idx < instr_idx + truthy = emit_truthy_w(v) + emit(` jnz ${truthy}, @${p}_take, @${p}_f`) + emit(`@${p}_take`) + if (jt_backedge) { + emit_backedge_branch(jt_lbl) + } else { + emit(` jmp @${jt_lbl}`) + } emit(`@${p}_f`) continue } if (op == "jump_false") { v = s_read(a1) p = fresh() - emit(` %${p} =w call $JS_ToBool(l %ctx, l ${v})`) - emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`) + jf_lbl = sanitize(a2) + jf_idx = label_pos[jf_lbl] + jf_backedge = jf_idx != null && jf_idx < instr_idx + truthy = emit_truthy_w(v) + emit(` jnz ${truthy}, @${p}_t, @${p}_take`) + emit(`@${p}_take`) + if (jf_backedge) { + emit_backedge_branch(jf_lbl) + } else { + emit(` jmp @${jf_lbl}`) + } emit(`@${p}_t`) continue } if (op == "jump_null") { v = s_read(a1) p = fresh() + jn_lbl = sanitize(a2) + jn_idx = label_pos[jn_lbl] + jn_backedge = jn_idx != null && jn_idx < instr_idx emit(` %${p} =w ceql ${v}, ${text(qbe.js_null)}`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_nn`) + if (jn_backedge) { + emit(` jnz %${p}, @${p}_bn, @${p}_nn`) + emit(`@${p}_bn`) + emit_backedge_branch(jn_lbl) + } else { + emit(` jnz %${p}, @${jn_lbl}, @${p}_nn`) + } emit(`@${p}_nn`) continue } if (op == "jump_not_null") { v = s_read(a1) p = fresh() + jnn_lbl = sanitize(a2) + jnn_idx = label_pos[jnn_lbl] + jnn_backedge = jnn_idx != null && jnn_idx < instr_idx emit(` %${p} =w cnel ${v}, ${text(qbe.js_null)}`) - emit(` jnz %${p}, @${sanitize(a2)}, @${p}_n`) + if (jnn_backedge) { + emit(` jnz %${p}, @${p}_bn, @${p}_n`) + emit(`@${p}_bn`) + emit_backedge_branch(jnn_lbl) + } else { + emit(` jnz %${p}, @${jnn_lbl}, @${p}_n`) + } emit(`@${p}_n`) continue } @@ -1224,17 +2474,41 @@ var qbe_emit = function(ir, qbe, export_name) { if (op == "setarg") { v = s_read(a1) lhs = s_read(a3) - emit(` call $cell_rt_setarg(l ${v}, l ${text(a2)}, l ${lhs})`) + p = fresh() + // JSFrame layout: [header,function,caller,address,slots...] + // slots start at byte offset 32. + emit(` %${p}_fr =l and ${v}, -8`) + emit(` %${p}_slot =l add %${p}_fr, ${text(32 + a2 * 8)}`) + emit(` storel ${lhs}, %${p}_slot`) continue } - if (op == "invoke") { - emit(` %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) - emit_exc_check() - continue - } - if (op == "tail_invoke") { - emit(` %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) - emit_exc_check() + if (op == "invoke" || op == "tail_invoke") { + if (use_invoke_trampoline) { + // Signal dispatcher to call frame in slot a1 and resume at @_segN. + seg_counter = seg_counter + 1 + resume_val = seg_counter * 65536 + a2 + p = fresh() + emit(` %${p}_addrp =l sub %fp, 8`) + // frame->address holds JS_NewInt32((seg << 16) | ret_slot), tagged. + emit(` storel ${text(resume_val * 2)}, %${p}_addrp`) + emit(` call $cell_rt_signal_call(l %ctx, l %fp, l ${text(a1)})`) + emit(` ret ${text(qbe.js_null)}`) + emit(`@_seg${text(seg_counter)}`) + // Dispatcher writes JS_EXCEPTION into ret slot on error; branch here. + v = s_read(a2) + emit(` %${p}_exc =w ceql ${v}, ${text(qbe.js_exception)}`) + if (has_handler && !in_handler) { + emit(` jnz %${p}_exc, @disruption_handler, @${p}_ok`) + } else { + needs_exc_ret = true + emit(` jnz %${p}_exc, @_exc_ret, @${p}_ok`) + } + emit(`@${p}_ok`) + } else { + // Direct helper invoke path (disabled by default). + emit(` %fp =l call $__invoke_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)})`) + emit_exc_check() + } continue } if (op == "goframe") { @@ -1243,21 +2517,29 @@ var qbe_emit = function(ir, qbe, export_name) { continue } if (op == "goinvoke") { - v = s_read(a1) - p = fresh() - emit(` %${p} =l call $cell_rt_goinvoke(l %ctx, l ${v})`) - chk = fresh() - emit(` %${chk} =w ceql %${p}, 15`) - if (has_handler) { - emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) - emit(`@${chk}_ok`) - refresh_fp() - emit(` ret %${p}`) + if (use_invoke_trampoline) { + // Tail call via dispatcher: no resume in this frame. + emit(` call $cell_rt_signal_tail_call(l %ctx, l %fp, l ${text(a1)})`) + emit(` ret ${text(qbe.js_null)}`) } else { - needs_exc_ret = true - emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) - emit(`@${chk}_ok`) - emit(` ret %${p}`) + // Direct helper goinvoke path (disabled by default). + v = s_read(a1) + p = fresh() + emit(` %${p}_r =l call $cell_rt_goinvoke(l %ctx, l ${v})`) + emit(` %${p}_exc =w ceql %${p}_r, ${text(qbe.js_exception)}`) + if (has_handler && !in_handler) { + emit(` jnz %${p}_exc, @${p}_exc, @${p}_ok`) + emit(`@${p}_exc`) + emit(` %fp =l call $cell_rt_refresh_fp(l %ctx)`) + emit(` jmp @disruption_handler`) + emit(`@${p}_ok`) + emit(` ret %${p}_r`) + } else { + needs_exc_ret = true + emit(` jnz %${p}_exc, @_exc_ret, @${p}_ok`) + emit(`@${p}_ok`) + emit(` ret %${p}_r`) + } } last_was_term = true continue @@ -1267,10 +2549,12 @@ var qbe_emit = function(ir, qbe, export_name) { if (op == "function") { fn_arity = 0 + fn_nr_slots = 0 if (a2 >= 0 && a2 < length(ir.functions)) { fn_arity = ir.functions[a2].nr_args + fn_nr_slots = ir.functions[a2].nr_slots } - emit(` %fp =l call $__function_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(fn_arity)})`) + emit(` %fp =l call $__function_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(fn_arity)}, l ${text(fn_nr_slots)})`) emit_exc_check() continue } @@ -1340,7 +2624,7 @@ var qbe_emit = function(ir, qbe, export_name) { } if (pn != null) { sl = intern_str(pn) - emit(` %fp =l call $__delete_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${sl})`) + emit(` %fp =l call $__delete_field_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(sl.idx)})`) } else { emit(` %fp =l call $__delete_dynamic_ss(l %ctx, l %fp, l ${text(a1)}, l ${text(a2)}, l ${text(a3)})`) } @@ -1363,7 +2647,7 @@ var qbe_emit = function(ir, qbe, export_name) { // IR: ["regexp", dest_slot, pattern_string, flags_string] pat_label = intern_str(a2) flg_label = intern_str(a3) - emit(` %fp =l call $__regexp_ss(l %ctx, l %fp, l ${text(a1)}, l ${pat_label}, l ${flg_label})`) + emit(` %fp =l call $__regexp_ss(l %ctx, l %fp, l ${text(a1)}, l ${pat_label.label}, l ${flg_label.label})`) emit_exc_check() continue } @@ -1407,6 +2691,20 @@ var qbe_emit = function(ir, qbe, export_name) { compile_fn(ir.main, -1, true) fn_bodies[] = text(out, "\n") + // Export nr_slots for main function so the module loader can use right-sized frames + var main_name = export_name ? sanitize(export_name) : "cell_main" + push(data_out, `export data $${main_name}_nr_slots = { w ${text(ir.main.nr_slots)} }`) + push(data_out, `export data $cell_lit_count = { w ${text(length(str_entries))} }`) + if (length(str_entries) > 0) { + lit_data = [] + si = 0 + while (si < length(str_entries)) { + push(lit_data, `l ${str_entries[si].label}`) + si = si + 1 + } + push(data_out, `export data $cell_lit_table = { ${text(lit_data, ", ")} }`) + } + return { data: text(data_out, "\n"), functions: fn_bodies, diff --git a/source/cell.c b/source/cell.c index a8ae53f5..0bb7d4c2 100644 --- a/source/cell.c +++ b/source/cell.c @@ -37,6 +37,7 @@ static char *compute_blake2_hex(const char *data, size_t size) { uint8_t hash[32]; crypto_blake2b(hash, 32, (const uint8_t *)data, size); char *hex = malloc(65); + if (!hex) return NULL; for (int i = 0; i < 32; i++) snprintf(hex + i * 2, 3, "%02x", hash[i]); return hex; @@ -64,6 +65,7 @@ static int write_cache_file(const char *path, const uint8_t *data, size_t size) // Returns heap-allocated binary data and sets *out_size, or NULL on failure static char *load_or_cache_bootstrap(const char *mcode_data, size_t mcode_size, size_t *out_size) { char *hex = compute_blake2_hex(mcode_data, mcode_size); + if (!hex) return NULL; char *cpath = build_cache_path(hex); free(hex); @@ -222,6 +224,7 @@ static char *try_engine_cache(size_t *out_size) { char *hex = compute_blake2_hex(src, src_size); free(src); + if (!hex) return NULL; char *cpath = build_cache_path(hex); if (!cpath) { free(hex); return NULL; } free(hex); diff --git a/source/mach.c b/source/mach.c index 60bcac94..fa74b598 100644 --- a/source/mach.c +++ b/source/mach.c @@ -460,6 +460,33 @@ JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count) { return frame; } +static JSValue js_new_register_code(JSContext *ctx, JSCodeRegister *code) { + JSCode *jc; + if (!code) return JS_EXCEPTION; + jc = ct_alloc(ctx, sizeof(JSCode), 8); + if (!jc) return JS_EXCEPTION; + memset(jc, 0, sizeof(JSCode)); + jc->header = objhdr_make(0, OBJ_CODE, 0, 0, 0, 0); + jc->kind = JS_CODE_KIND_REGISTER; + jc->arity = (int16_t)code->arity; + jc->u.reg.code = code; + return JS_MKPTR(jc); +} + +static JSValue js_new_native_code(JSContext *ctx, void *fn_ptr, void *dl_handle, + uint16_t nr_slots, int arity) { + JSCode *jc = ct_alloc(ctx, sizeof(JSCode), 8); + if (!jc) return JS_EXCEPTION; + memset(jc, 0, sizeof(JSCode)); + jc->header = objhdr_make(0, OBJ_CODE, 0, 0, 0, 0); + jc->kind = JS_CODE_KIND_NATIVE; + jc->arity = (int16_t)arity; + jc->u.native.fn_ptr = fn_ptr; + jc->u.native.dl_handle = dl_handle; + jc->u.native.nr_slots = nr_slots; + return JS_MKPTR(jc); +} + /* Create a register-based function from JSCodeRegister */ JSValue js_new_register_function(JSContext *ctx, JSCodeRegister *code, JSValue env, JSValue outer_frame) { /* Protect env and outer_frame from GC — js_mallocz can trigger @@ -470,24 +497,84 @@ JSValue js_new_register_function(JSContext *ctx, JSCodeRegister *code, JSValue e JS_PushGCRef(ctx, &frame_ref); frame_ref.val = outer_frame; - JSFunction *fn = js_mallocz(ctx, sizeof(JSFunction)); + JSGCRef fn_ref; + JSFunction *fn; + JSValue code_obj; + + JS_AddGCRef(ctx, &fn_ref); + fn_ref.val = JS_NULL; + + fn = js_mallocz(ctx, sizeof(JSFunction)); if (!fn) { + JS_DeleteGCRef(ctx, &fn_ref); JS_PopGCRef(ctx, &frame_ref); JS_PopGCRef(ctx, &env_ref); return JS_EXCEPTION; } + fn_ref.val = JS_MKPTR(fn); fn->header = objhdr_make(0, OBJ_FUNCTION, 0, 0, 0, 0); fn->kind = JS_FUNC_KIND_REGISTER; fn->length = code->arity; fn->name = code->name; - fn->u.reg.code = code; - fn->u.reg.env_record = env_ref.val; - fn->u.reg.outer_frame = frame_ref.val; + code_obj = js_new_register_code(ctx, code); + if (JS_IsException(code_obj)) { + JS_DeleteGCRef(ctx, &fn_ref); + JS_PopGCRef(ctx, &frame_ref); + JS_PopGCRef(ctx, &env_ref); + return JS_EXCEPTION; + } + fn = JS_VALUE_GET_FUNCTION(fn_ref.val); + fn->u.cell.code = code_obj; + fn->u.cell.env_record = env_ref.val; + fn->u.cell.outer_frame = frame_ref.val; + JSValue out = fn_ref.val; + JS_DeleteGCRef(ctx, &fn_ref); JS_PopGCRef(ctx, &frame_ref); JS_PopGCRef(ctx, &env_ref); - return JS_MKPTR(fn); + return out; +} + +JSValue js_new_native_function_with_code(JSContext *ctx, JSValue code_obj, int arity, JSValue outer_frame) { + JSGCRef frame_ref; + JSGCRef fn_ref; + JSFunction *fn; + JS_PushGCRef(ctx, &frame_ref); + frame_ref.val = outer_frame; + JS_AddGCRef(ctx, &fn_ref); + fn_ref.val = JS_NULL; + + fn = js_mallocz(ctx, sizeof(JSFunction)); + if (!fn) { + JS_DeleteGCRef(ctx, &fn_ref); + JS_PopGCRef(ctx, &frame_ref); + return JS_EXCEPTION; + } + fn_ref.val = JS_MKPTR(fn); + + fn->header = objhdr_make(0, OBJ_FUNCTION, 0, 0, 0, 0); + fn->kind = JS_FUNC_KIND_NATIVE; + fn->length = arity; + fn->name = JS_NULL; + fn = JS_VALUE_GET_FUNCTION(fn_ref.val); + fn->u.cell.code = code_obj; + fn->u.cell.env_record = JS_NULL; + fn->u.cell.outer_frame = frame_ref.val; + + JSValue out = fn_ref.val; + JS_DeleteGCRef(ctx, &fn_ref); + JS_PopGCRef(ctx, &frame_ref); + return out; +} + +/* Create a native (QBE-compiled) function */ +JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, + uint16_t nr_slots, int arity, JSValue outer_frame) { + JSValue code_obj = js_new_native_code(ctx, fn_ptr, dl_handle, nr_slots, arity); + if (JS_IsException(code_obj)) + return JS_EXCEPTION; + return js_new_native_function_with_code(ctx, code_obj, arity, outer_frame); } /* Binary operations helper */ @@ -521,7 +608,7 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { return JS_NewFloat64(ctx, (double)ia / (double)ib); case MACH_MOD: if (ib == 0) return JS_NULL; - return JS_NewInt32(ctx, ia % ib); + return JS_NewFloat64(ctx, (double)ia - ((double)ib * floor((double)ia / (double)ib))); case MACH_EQ: return JS_NewBool(ctx, ia == ib); case MACH_NEQ: @@ -647,8 +734,9 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { } case MACH_MOD: { if (db == 0.0) return JS_NULL; - double r = fmod(da, db); - if (!isfinite(r)) return JS_NULL; + if (isnan(da) || isnan(db)) return JS_NULL; + if (da == 0.0) return JS_NewFloat64(ctx, 0.0); + double r = da - (db * floor(da / db)); return JS_NewFloat64(ctx, r); } case MACH_POW: { @@ -678,6 +766,34 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) { return JS_RaiseDisrupt(ctx, "type mismatch in binary operation"); } +static inline int mach_get_number(JSValue v, double *out) { + uint32_t tag = JS_VALUE_GET_TAG(v); + if (tag == JS_TAG_INT) { + *out = (double)JS_VALUE_GET_INT(v); + return 0; + } + if (JS_TAG_IS_FLOAT64(tag)) { + *out = JS_VALUE_GET_FLOAT64(v); + return 0; + } + return -1; +} + +static inline int mach_get_place(JSContext *ctx, JSValue v, int32_t *out) { + uint32_t tag = JS_VALUE_GET_NORM_TAG(v); + if (tag == JS_TAG_INT || tag == JS_TAG_BOOL || tag == JS_TAG_NULL || tag == JS_TAG_FLOAT64) { + return JS_ToInt32(ctx, out, v); + } + return -1; +} + +static inline double mach_apply_place(double d, int32_t place, double (*f)(double)) { + if (place == 0) + return f(d); + double mult = pow(10.0, -(double)place); + return f(d * mult) / mult; +} + #ifdef HAVE_ASAN void __asan_on_error(void) { @@ -695,8 +811,8 @@ void __asan_on_error(void) { const char *file = NULL; uint16_t line = 0; uint32_t pc = is_first ? cur_pc : 0; - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - JSCodeRegister *code = fn->u.reg.code; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + JSCodeRegister *code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; file = code->filename_cstr; func_name = code->name_cstr; if (!is_first) @@ -732,8 +848,8 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, ctx->suspended_frame_ref.val = JS_NULL; frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; - env = fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; + env = fn->u.cell.env_record; pc = ctx->suspended_pc; result = JS_NULL; #ifdef HAVE_ASAN @@ -830,6 +946,12 @@ vm_dispatch: DT(MACH_MUL), DT(MACH_DIV), DT(MACH_MOD), DT(MACH_POW), DT(MACH_NEG), + DT(MACH_REMAINDER), DT(MACH_MAX), + DT(MACH_MIN), DT(MACH_ABS), + DT(MACH_SIGN), DT(MACH_FRACTION), + DT(MACH_INTEGER), DT(MACH_FLOOR), + DT(MACH_CEILING), DT(MACH_ROUND), + DT(MACH_TRUNC), DT(MACH_EQ), DT(MACH_NEQ), DT(MACH_LT), DT(MACH_LE), DT(MACH_GT), DT(MACH_GE), @@ -1017,17 +1139,15 @@ vm_dispatch: } VM_CASE(MACH_MOD): { JSValue left = frame->slots[b], right = frame->slots[c]; - if (JS_VALUE_IS_BOTH_INT(left, right)) { - int32_t ib = JS_VALUE_GET_INT(right); - frame->slots[a] = (ib != 0) ? JS_NewInt32(ctx, JS_VALUE_GET_INT(left) % ib) : JS_NULL; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0 || db == 0.0 || + isnan(da) || isnan(db)) { + frame->slots[a] = JS_NULL; } else { - double da, db, r; - JS_ToFloat64(ctx, &da, left); - JS_ToFloat64(ctx, &db, right); - if (db == 0.0) { frame->slots[a] = JS_NULL; } - else { - r = fmod(da, db); - frame->slots[a] = !isfinite(r) ? JS_NULL : JS_NewFloat64(ctx, r); + if (da == 0.0) { + frame->slots[a] = JS_NewFloat64(ctx, 0.0); + } else { + frame->slots[a] = JS_NewFloat64(ctx, da - (db * floor(da / db))); } } VM_BREAK(); @@ -1051,6 +1171,116 @@ vm_dispatch: VM_BREAK(); } + VM_CASE(MACH_REMAINDER): { + JSValue left = frame->slots[b], right = frame->slots[c]; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0 || db == 0.0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, da - (trunc(da / db) * db)); + } + VM_BREAK(); + } + + VM_CASE(MACH_MAX): { + JSValue left = frame->slots[b], right = frame->slots[c]; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, da > db ? da : db); + } + VM_BREAK(); + } + + VM_CASE(MACH_MIN): { + JSValue left = frame->slots[b], right = frame->slots[c]; + double da, db; + if (mach_get_number(left, &da) != 0 || mach_get_number(right, &db) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, da < db ? da : db); + } + VM_BREAK(); + } + + VM_CASE(MACH_ABS): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, fabs(d)); + } + VM_BREAK(); + } + + VM_CASE(MACH_SIGN): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else if (d < 0) { + frame->slots[a] = JS_NewInt32(ctx, -1); + } else if (d > 0) { + frame->slots[a] = JS_NewInt32(ctx, 1); + } else { + frame->slots[a] = JS_NewInt32(ctx, 0); + } + VM_BREAK(); + } + + VM_CASE(MACH_FRACTION): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, d - trunc(d)); + } + VM_BREAK(); + } + + VM_CASE(MACH_INTEGER): { + JSValue v = frame->slots[b]; + double d; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + } else { + frame->slots[a] = JS_NewFloat64(ctx, trunc(d)); + } + VM_BREAK(); + } + + VM_CASE(MACH_FLOOR): + VM_CASE(MACH_CEILING): + VM_CASE(MACH_ROUND): + VM_CASE(MACH_TRUNC): { + JSValue v = frame->slots[b]; + JSValue pval = frame->slots[c]; + double d, r; + int32_t place = 0; + if (mach_get_number(v, &d) != 0) { + frame->slots[a] = JS_NULL; + VM_BREAK(); + } + if (!JS_IsNull(pval) && mach_get_place(ctx, pval, &place) != 0) { + frame->slots[a] = JS_NULL; + VM_BREAK(); + } + if (op == MACH_FLOOR) { + r = mach_apply_place(d, place, floor); + } else if (op == MACH_CEILING) { + r = mach_apply_place(d, place, ceil); + } else if (op == MACH_ROUND) { + r = mach_apply_place(d, place, round); + } else { + r = mach_apply_place(d, place, trunc); + } + frame->slots[a] = JS_NewFloat64(ctx, r); + VM_BREAK(); + } + /* Comparison — inline integer fast paths */ VM_CASE(MACH_EQ): { JSValue left = frame->slots[b], right = frame->slots[c]; @@ -1330,7 +1560,7 @@ vm_dispatch: /* Read env fresh from frame->function — C local env can go stale after GC */ int bx = MACH_GET_Bx(instr); JSValue key = code->cpool[bx]; - JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.reg.env_record; + JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.cell.env_record; JSValue val = JS_GetProperty(ctx, cur_env, key); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); frame->slots[a] = val; @@ -1342,7 +1572,7 @@ vm_dispatch: int bx = MACH_GET_Bx(instr); JSValue key = code->cpool[bx]; JSValue val = JS_NULL; - JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.reg.env_record; + JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.cell.env_record; if (!JS_IsNull(cur_env)) { val = JS_GetProperty(ctx, cur_env, key); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); @@ -1359,7 +1589,7 @@ vm_dispatch: /* R(A) = outer_frame[B].slots[C] — walk lexical scope chain */ int depth = b; JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); if (!target) { fprintf(stderr, "GETUP: NULL outer_frame at depth 0! pc=%d a=%d depth=%d slot=%d nr_slots=%d instr=0x%08x\n", pc-1, a, depth, c, code->nr_slots, instr); @@ -1368,7 +1598,7 @@ vm_dispatch: } for (int d = 1; d < depth; d++) { fn = JS_VALUE_GET_FUNCTION(target->function); - JSFrameRegister *next = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + JSFrameRegister *next = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); if (!next) { fprintf(stderr, "GETUP: NULL outer_frame at depth %d! pc=%d a=%d depth=%d slot=%d nr_slots=%d instr=0x%08x\n", d, pc-1, a, depth, c, code->nr_slots, instr); @@ -1385,10 +1615,10 @@ vm_dispatch: /* outer_frame[B].slots[C] = R(A) — walk lexical scope chain */ int depth = b; JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + JSFrameRegister *target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); for (int d = 1; d < depth; d++) { fn = JS_VALUE_GET_FUNCTION(target->function); - target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.reg.outer_frame); + target = (JSFrameRegister *)JS_VALUE_GET_PTR(fn->u.cell.outer_frame); } target->slots[c] = frame->slots[a]; VM_BREAK(); @@ -1482,9 +1712,9 @@ vm_dispatch: const char *callee_file = "?"; { JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(frame->function); - if (callee_fn->kind == JS_FUNC_KIND_REGISTER && callee_fn->u.reg.code) { - if (callee_fn->u.reg.code->name_cstr) callee_name = callee_fn->u.reg.code->name_cstr; - if (callee_fn->u.reg.code->filename_cstr) callee_file = callee_fn->u.reg.code->filename_cstr; + if (callee_fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code) { + if (JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->name_cstr) callee_name = JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->name_cstr; + if (JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->filename_cstr) callee_file = JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.reg.code->filename_cstr; } } #endif @@ -1494,8 +1724,8 @@ vm_dispatch: frame_ref.val = JS_MKPTR(frame); int ret_info = JS_VALUE_GET_INT(frame->address); JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; - env = fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; + env = fn->u.cell.env_record; pc = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; if (ret_slot != 0xFFFF) { @@ -1527,8 +1757,8 @@ vm_dispatch: frame_ref.val = JS_MKPTR(frame); int ret_info = JS_VALUE_GET_INT(frame->address); JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; - env = fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; + env = fn->u.cell.env_record; pc = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; if (ret_slot != 0xFFFF) frame->slots[ret_slot] = result; @@ -1556,7 +1786,7 @@ vm_dispatch: if ((uint32_t)bx < code->func_count) { JSCodeRegister *fn_code = code->functions[bx]; /* Read env fresh from frame->function — C local can be stale */ - JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.reg.env_record; + JSValue cur_env = JS_VALUE_GET_FUNCTION(frame->function)->u.cell.env_record; JSValue fn_val = js_new_register_function(ctx, fn_code, cur_env, frame_ref.val); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); frame->slots[a] = fn_val; @@ -1943,7 +2173,7 @@ vm_dispatch: if (fn->kind == JS_FUNC_KIND_REGISTER) { /* Register function: switch frames inline (fast path) */ - JSCodeRegister *fn_code = fn->u.reg.code; + JSCodeRegister *fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots); if (!new_frame) { frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); @@ -1953,7 +2183,7 @@ vm_dispatch: fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]); fn_val = fr->function; fn = JS_VALUE_GET_FUNCTION(fn_val); - fn_code = fn->u.reg.code; + fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; new_frame->function = fn_val; /* Copy this + args from call frame to new frame */ int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity; @@ -1966,16 +2196,18 @@ vm_dispatch: frame = new_frame; frame_ref.val = JS_MKPTR(frame); code = fn_code; - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->entry_point; } else { - /* C or bytecode function: args already in fr->slots (GC-protected via frame chain) */ + /* C, native, or bytecode function */ ctx->reg_current_frame = frame_ref.val; ctx->current_register_pc = pc > 0 ? pc - 1 : 0; ctx->vm_call_depth++; JSValue ret; if (fn->kind == JS_FUNC_KIND_C) ret = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); + else if (fn->kind == JS_FUNC_KIND_NATIVE) + ret = cell_native_dispatch(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); else ret = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1], 0); ctx->vm_call_depth--; @@ -2011,7 +2243,7 @@ vm_dispatch: JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val); if (fn->kind == JS_FUNC_KIND_REGISTER) { - JSCodeRegister *fn_code = fn->u.reg.code; + JSCodeRegister *fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; int current_slots = (int)objhdr_cap56(frame->header); if (fn_code->nr_slots <= current_slots) { @@ -2026,7 +2258,7 @@ vm_dispatch: frame->function = fn_val; /* caller stays the same — we're reusing this frame */ code = fn_code; - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->entry_point; } else { /* SLOW PATH: callee needs more slots, must allocate */ @@ -2039,7 +2271,7 @@ vm_dispatch: fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]); fn_val = fr->function; fn = JS_VALUE_GET_FUNCTION(fn_val); - fn_code = fn->u.reg.code; + fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; new_frame->function = fn_val; int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity; new_frame->slots[0] = fr->slots[0]; /* this */ @@ -2050,17 +2282,19 @@ vm_dispatch: frame = new_frame; frame_ref.val = JS_MKPTR(frame); code = fn_code; - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->entry_point; } } else { - /* C/bytecode function: call it, then return result to our caller */ + /* C, native, or bytecode function: call it, then return result to our caller */ ctx->reg_current_frame = frame_ref.val; ctx->current_register_pc = pc > 0 ? pc - 1 : 0; ctx->vm_call_depth++; JSValue ret; if (fn->kind == JS_FUNC_KIND_C) ret = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); + else if (fn->kind == JS_FUNC_KIND_NATIVE) + ret = cell_native_dispatch(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); else ret = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1], 0); ctx->vm_call_depth--; @@ -2076,8 +2310,8 @@ vm_dispatch: frame_ref.val = JS_MKPTR(frame); int ret_info = JS_VALUE_GET_INT(frame->address); JSFunction *ret_fn = JS_VALUE_GET_FUNCTION(frame->function); - code = ret_fn->u.reg.code; - env = ret_fn->u.reg.env_record; + code = JS_VALUE_GET_CODE(ret_fn->u.cell.code)->u.reg.code; + env = ret_fn->u.cell.env_record; pc = ret_info >> 16; int ret_slot = ret_info & 0xFFFF; if (ret_slot != 0xFFFF) frame->slots[ret_slot] = ret; @@ -2129,10 +2363,10 @@ vm_dispatch: uint32_t frame_pc = pc; for (;;) { JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - code = fn->u.reg.code; + code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; /* Only enter handler if we're not already inside it */ if (code->disruption_pc > 0 && frame_pc < code->disruption_pc) { - env = fn->u.reg.env_record; + env = fn->u.cell.env_record; pc = code->disruption_pc; ctx->disruption_reported = FALSE; frame_ref.val = JS_MKPTR(frame); /* root handler frame for GC */ @@ -2373,6 +2607,17 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { else if (strcmp(op, "modulo") == 0) { ABC3(MACH_MOD); } else if (strcmp(op, "pow") == 0) { ABC3(MACH_POW); } else if (strcmp(op, "negate") == 0) { AB2(MACH_NEG); } + else if (strcmp(op, "remainder") == 0) { ABC3(MACH_REMAINDER); } + else if (strcmp(op, "max") == 0) { ABC3(MACH_MAX); } + else if (strcmp(op, "min") == 0) { ABC3(MACH_MIN); } + else if (strcmp(op, "abs") == 0) { AB2(MACH_ABS); } + else if (strcmp(op, "sign") == 0) { AB2(MACH_SIGN); } + else if (strcmp(op, "fraction") == 0) { AB2(MACH_FRACTION); } + else if (strcmp(op, "integer") == 0) { AB2(MACH_INTEGER); } + else if (strcmp(op, "floor") == 0) { ABC3(MACH_FLOOR); } + else if (strcmp(op, "ceiling") == 0) { ABC3(MACH_CEILING); } + else if (strcmp(op, "round") == 0) { ABC3(MACH_ROUND); } + else if (strcmp(op, "trunc") == 0) { ABC3(MACH_TRUNC); } /* Typed integer comparisons */ else if (strcmp(op, "eq_int") == 0) { ABC3(MACH_EQ_INT); } else if (strcmp(op, "ne_int") == 0) { ABC3(MACH_NE_INT); } @@ -3077,4 +3322,3 @@ void JS_DumpMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue en dump_register_code(ctx, code, 0); JS_PopGCRef(ctx, &env_ref); } - diff --git a/source/qbe_helpers.c b/source/qbe_helpers.c index 44c54943..ba90379a 100644 --- a/source/qbe_helpers.c +++ b/source/qbe_helpers.c @@ -9,6 +9,8 @@ #include "quickjs-internal.h" #include #include +#include +#include /* Non-inline wrappers for static inline functions in quickjs.h */ JSValue qbe_new_float64(JSContext *ctx, double d) { @@ -222,23 +224,210 @@ JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b) { /* --- Property access --- */ -JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) { +typedef struct { + void *dl_handle; + JSContext *ctx; + JSGCRef *vals; + int count; +} AOTLiteralPool; +typedef struct { + const char *name; + JSValue key; +} AOTKeyCacheEntry; +typedef struct { + void *dl_handle; + int64_t fn_idx; + JSValue code; +} AOTCodeCacheEntry; + +typedef struct AOTGCRefChunk AOTGCRefChunk; + +typedef struct { + void *current_dl_handle; + AOTLiteralPool lit_pool; + AOTKeyCacheEntry *key_cache; + int key_cache_count; + int key_cache_cap; + AOTCodeCacheEntry *code_cache; + int code_cache_count; + int code_cache_cap; + JSGCRef native_env_ref; + int has_native_env; + int native_env_ref_inited; + AOTGCRefChunk **gc_ref_chunks; + int gc_ref_chunk_count; + int aot_depth; + JSValue pending_callee_frame; + int pending_is_tail; +} NativeRTState; + +static NativeRTState *native_state(JSContext *ctx) { + NativeRTState *st = (NativeRTState *)ctx->native_state; + if (st) return st; + st = js_mallocz_rt(sizeof(*st)); + if (!st) { + JS_RaiseOOM(ctx); + return NULL; + } + ctx->native_state = st; + return st; +} + +static void aot_clear_lit_pool(JSContext *ctx, NativeRTState *st) { + if (!st) return; + if (st->lit_pool.vals) { + for (int i = 0; i < st->lit_pool.count; i++) + JS_DeleteGCRef(ctx, &st->lit_pool.vals[i]); + free(st->lit_pool.vals); + } + st->lit_pool.dl_handle = NULL; + st->lit_pool.ctx = NULL; + st->lit_pool.vals = NULL; + st->lit_pool.count = 0; +} + +static int aot_load_lit_pool(JSContext *ctx, NativeRTState *st, void *dl_handle) { + aot_clear_lit_pool(ctx, st); + st->lit_pool.dl_handle = dl_handle; + st->lit_pool.ctx = ctx; + if (!dl_handle) + return 1; + + int *count_ptr = (int *)dlsym(dl_handle, "cell_lit_count"); + const char **table_ptr = (const char **)dlsym(dl_handle, "cell_lit_table"); + int count = count_ptr ? *count_ptr : 0; + if (count <= 0 || !table_ptr) + return 1; + + st->lit_pool.vals = (JSGCRef *)calloc((size_t)count, sizeof(JSGCRef)); + if (!st->lit_pool.vals) { + JS_RaiseOOM(ctx); + return 0; + } + st->lit_pool.count = 0; + + for (int i = 0; i < count; i++) { + const char *cstr = table_ptr[i] ? table_ptr[i] : ""; + JS_AddGCRef(ctx, &st->lit_pool.vals[i]); + st->lit_pool.count = i + 1; + st->lit_pool.vals[i].val = js_key_new(ctx, cstr); + if (JS_IsException(st->lit_pool.vals[i].val)) { + aot_clear_lit_pool(ctx, st); + return 0; + } + } + return 1; +} + +static JSValue aot_lit_from_index(JSContext *ctx, int64_t lit_idx) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + if (lit_idx < 0) { + JS_RaiseDisrupt(ctx, "literal index out of range"); + return JS_EXCEPTION; + } + + if (st->lit_pool.dl_handle != st->current_dl_handle || st->lit_pool.ctx != ctx) { + if (!aot_load_lit_pool(ctx, st, st->current_dl_handle)) + return JS_EXCEPTION; + } + + if (lit_idx >= st->lit_pool.count) { + JS_RaiseDisrupt(ctx, "literal index out of range"); + return JS_EXCEPTION; + } + return st->lit_pool.vals[lit_idx].val; +} + +/* Convert a static C string to an interned JSValue key. + Uses a small per-actor cache keyed by C-string pointer to avoid + repeated UTF-8 decoding in hot property paths. */ +static JSValue aot_key_from_cstr(JSContext *ctx, const char *name) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + if (!name) + return JS_NULL; + + for (int i = 0; i < st->key_cache_count; i++) { + if (st->key_cache[i].name == name) + return st->key_cache[i].key; + } + + JSValue key = js_key_new(ctx, name); + if (JS_IsNull(key)) + return JS_RaiseDisrupt(ctx, "invalid property key"); + + if (st->key_cache_count >= st->key_cache_cap) { + int new_cap = st->key_cache_cap ? (st->key_cache_cap * 2) : 64; + AOTKeyCacheEntry *new_cache = + (AOTKeyCacheEntry *)realloc(st->key_cache, (size_t)new_cap * sizeof(*new_cache)); + if (!new_cache) + return JS_RaiseOOM(ctx); + st->key_cache = new_cache; + st->key_cache_cap = new_cap; + } + + st->key_cache[st->key_cache_count].name = name; + st->key_cache[st->key_cache_count].key = key; + st->key_cache_count++; + return key; +} + +static JSValue cell_rt_load_field_key(JSContext *ctx, JSValue obj, JSValue key) { if (JS_IsFunction(obj)) { JS_RaiseDisrupt(ctx, "cannot read property of function"); return JS_EXCEPTION; } - return JS_GetPropertyStr(ctx, obj, name); + return JS_GetProperty(ctx, obj, key); +} + +JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_load_field_key(ctx, obj, key); +} + +JSValue cell_rt_load_field_lit(JSContext *ctx, JSValue obj, int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_load_field_key(ctx, obj, key); } /* Like cell_rt_load_field but without the function guard. Used by load_dynamic when the key happens to be a static string. */ JSValue cell_rt_load_prop_str(JSContext *ctx, JSValue obj, const char *name) { - return JS_GetPropertyStr(ctx, obj, name); + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return JS_GetProperty(ctx, obj, key); } -void cell_rt_store_field(JSContext *ctx, JSValue val, JSValue obj, - const char *name) { - JS_SetPropertyStr(ctx, obj, name, val); +static int cell_rt_store_field_key(JSContext *ctx, JSValue val, JSValue obj, + JSValue key) { + int ret = JS_SetProperty(ctx, obj, key, val); + return (ret < 0 || JS_HasException(ctx)) ? 0 : 1; +} + +int cell_rt_store_field(JSContext *ctx, JSValue val, JSValue obj, + const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return 0; + return cell_rt_store_field_key(ctx, val, obj, key); +} + +int cell_rt_store_field_lit(JSContext *ctx, JSValue val, JSValue obj, + int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return 0; + return cell_rt_store_field_key(ctx, val, obj, key); +} + +JSValue cell_rt_access_lit(JSContext *ctx, int64_t lit_idx) { + return aot_lit_from_index(ctx, lit_idx); } JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) { @@ -247,16 +436,22 @@ JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) { return JS_GetProperty(ctx, obj, key); } -void cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj, - JSValue key) { +int cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj, + JSValue key) { + int ret = 0; + JSValue nr = JS_NULL; if (JS_IsInt(key)) { - JS_SetPropertyNumber(ctx, obj, (uint32_t)JS_VALUE_GET_INT(key), val); + nr = JS_SetPropertyNumber(ctx, obj, (uint32_t)JS_VALUE_GET_INT(key), val); + return JS_IsException(nr) ? 0 : 1; } else if (JS_IsArray(obj) && !JS_IsInt(key)) { JS_RaiseDisrupt(ctx, "array index must be a number"); + return 0; } else if (JS_IsBool(key) || JS_IsNull(key) || JS_IsArray(key) || JS_IsFunction(key)) { JS_RaiseDisrupt(ctx, "object key must be text"); + return 0; } else { - JS_SetProperty(ctx, obj, key, val); + ret = JS_SetProperty(ctx, obj, key, val); + return (ret < 0 || JS_HasException(ctx)) ? 0 : 1; } } @@ -266,41 +461,59 @@ JSValue cell_rt_load_index(JSContext *ctx, JSValue arr, JSValue idx) { return JS_GetProperty(ctx, arr, idx); } -void cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, - JSValue idx) { +int cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr, + JSValue idx) { + int ret = 0; + JSValue nr = JS_NULL; if (JS_IsInt(idx)) - JS_SetPropertyNumber(ctx, arr, (uint32_t)JS_VALUE_GET_INT(idx), val); + nr = JS_SetPropertyNumber(ctx, arr, (uint32_t)JS_VALUE_GET_INT(idx), val); else - JS_SetProperty(ctx, arr, idx, val); + ret = JS_SetProperty(ctx, arr, idx, val); + if (JS_IsInt(idx)) + return JS_IsException(nr) ? 0 : 1; + return (ret < 0 || JS_HasException(ctx)) ? 0 : 1; } /* --- Intrinsic/global lookup --- */ -/* Native module environment — set before executing a native module's cell_main. - Contains runtime functions (starts_with, ends_with, etc.) and use(). */ -static JSGCRef g_native_env_ref; -static int g_has_native_env = 0; - void cell_rt_set_native_env(JSContext *ctx, JSValue env) { + NativeRTState *st = native_state(ctx); + if (!st) return; if (!JS_IsNull(env) && !JS_IsStone(env)) { fprintf(stderr, "cell_rt_set_native_env: ERROR env not stone\n"); abort(); } - if (g_has_native_env) - JS_DeleteGCRef(ctx, &g_native_env_ref); + /* Drop module literal pool roots before switching native env/module. */ + aot_clear_lit_pool(ctx, st); + + /* Native module boundary: clear per-actor key cache so stale keys + cannot survive across context/module lifetimes. */ + free(st->key_cache); + st->key_cache = NULL; + st->key_cache_count = 0; + st->key_cache_cap = 0; + + if (st->has_native_env && st->native_env_ref_inited) { + JS_DeleteGCRef(ctx, &st->native_env_ref); + st->native_env_ref_inited = 0; + } if (!JS_IsNull(env)) { - JS_AddGCRef(ctx, &g_native_env_ref); - g_native_env_ref.val = env; - g_has_native_env = 1; + JS_AddGCRef(ctx, &st->native_env_ref); + st->native_env_ref_inited = 1; + st->native_env_ref.val = env; + st->has_native_env = 1; } else { - g_has_native_env = 0; + st->has_native_env = 0; + st->native_env_ref.val = JS_NULL; } } -JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { +static JSValue cell_rt_get_intrinsic_key(JSContext *ctx, JSValue key) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; /* Check native env first (runtime-provided functions like log) */ - if (g_has_native_env) { - JSValue v = JS_GetPropertyStr(ctx, g_native_env_ref.val, name); + if (st->has_native_env) { + JSValue v = JS_GetProperty(ctx, st->native_env_ref.val, key); if (!JS_IsNull(v)) return v; } @@ -311,34 +524,57 @@ JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { JSRecord *rec = (JSRecord *)chase(gobj); uint64_t mask = objhdr_cap56(rec->mist_hdr); for (uint64_t i = 1; i <= mask; i++) { - if (js_key_equal_str(rec->slots[i].key, name)) + if (js_key_equal(rec->slots[i].key, key)) return rec->slots[i].val; } } - JS_RaiseDisrupt(ctx, "'%s' is not defined", name); + JS_RaiseDisrupt(ctx, "name is not defined"); return JS_EXCEPTION; } +JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_get_intrinsic_key(ctx, key); +} + +JSValue cell_rt_get_intrinsic_lit(JSContext *ctx, int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_get_intrinsic_key(ctx, key); +} + /* --- Closure access --- - Slot 511 in each frame stores the magic ID (registry index) of the - function that owns this frame. cell_rt_get/put_closure re-derive - the enclosing frame from the function's GC ref at call time, so - pointers stay valid even if GC moves frames. */ + Walk the outer_frame chain on JSFunction (JS_FUNC_KIND_NATIVE). + The frame's function field links to the JSFunction, whose + u.native.outer_frame points to the enclosing frame. + GC traces outer_frame naturally — no registry needed. */ -#define QBE_FRAME_OUTER_SLOT 511 - -static JSValue *derive_outer_fp(int magic); +/* Get the outer frame's slots from a frame pointer. + The frame's function must be JS_FUNC_KIND_NATIVE. */ +static JSValue *get_outer_frame_slots(JSValue *fp) { + /* fp points to frame->slots[0]; frame header is before it */ + JSFrameRegister *frame = (JSFrameRegister *)((char *)fp - offsetof(JSFrameRegister, slots)); + if (JS_IsNull(frame->function)) + return NULL; + JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); + if (fn->kind != JS_FUNC_KIND_NATIVE) + return NULL; + JSValue outer = fn->u.cell.outer_frame; + if (JS_IsNull(outer)) + return NULL; + JSFrameRegister *outer_frame = (JSFrameRegister *)JS_VALUE_GET_PTR(outer); + return (JSValue *)outer_frame->slots; +} JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth, int64_t slot) { + (void)ctx; JSValue *frame = (JSValue *)fp; for (int64_t d = 0; d < depth; d++) { - /* fp[511] stores the magic ID (registry index) of the function - that owns this frame. derive_outer_fp re-derives the enclosing - frame from the function's GC ref, so it's always current even - if GC moved the frame. */ - int magic = (int)(int64_t)frame[QBE_FRAME_OUTER_SLOT]; - frame = derive_outer_fp(magic); + frame = get_outer_frame_slots(frame); if (!frame) return JS_NULL; } @@ -347,81 +583,134 @@ JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth, void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth, int64_t slot) { + (void)ctx; JSValue *frame = (JSValue *)fp; for (int64_t d = 0; d < depth; d++) { - int magic = (int)(int64_t)frame[QBE_FRAME_OUTER_SLOT]; - frame = derive_outer_fp(magic); + frame = get_outer_frame_slots(frame); if (!frame) return; } frame[slot] = val; } /* --- GC-managed AOT frame stack --- - Each AOT function call pushes a GC ref so the GC can find and - update frame pointers when it moves objects. cell_rt_refresh_fp - re-derives the slot pointer after any GC-triggering call. */ + Each native dispatch loop pushes a GC ref so the GC can find and + update the current frame pointer when it moves objects. + cell_rt_refresh_fp re-derives the slot pointer after any GC call. */ -#define MAX_AOT_DEPTH 65536 -static JSGCRef g_aot_gc_refs[MAX_AOT_DEPTH]; -static int g_aot_depth = 0; +// Keep GC roots for native frames in stable heap chunks (no fixed depth cap). +#define AOT_GC_REF_CHUNK_SIZE 1024 +typedef struct AOTGCRefChunk { + JSGCRef refs[AOT_GC_REF_CHUNK_SIZE]; + uint8_t inited[AOT_GC_REF_CHUNK_SIZE]; +} AOTGCRefChunk; -/* Check remaining C stack space to prevent segfaults from deep recursion */ -static int stack_space_ok(void) { -#ifdef __APPLE__ - char local; - void *stack_addr = pthread_get_stackaddr_np(pthread_self()); - size_t stack_size = pthread_get_stacksize_np(pthread_self()); - /* stack_addr is the TOP of the stack (highest address); stack grows down */ - uintptr_t stack_bottom = (uintptr_t)stack_addr - stack_size; - uintptr_t current = (uintptr_t)&local; - /* Keep 128KB of reserve for unwinding and error handling */ - return (current - stack_bottom) > (128 * 1024); -#else - return g_aot_depth < MAX_AOT_DEPTH; -#endif +int cell_rt_native_active(JSContext *ctx) { + NativeRTState *st = (NativeRTState *)ctx->native_state; + return st ? (st->aot_depth > 0) : 0; +} + +static int ensure_aot_gc_ref_slot(JSContext *ctx, NativeRTState *st, int depth_index) { + if (depth_index < 0) + return 0; + int needed_chunks = (depth_index / AOT_GC_REF_CHUNK_SIZE) + 1; + if (needed_chunks <= st->gc_ref_chunk_count) + return 1; + AOTGCRefChunk **new_chunks = + (AOTGCRefChunk **)realloc(st->gc_ref_chunks, + (size_t)needed_chunks * sizeof(*new_chunks)); + if (!new_chunks) { + JS_RaiseOOM(ctx); + return 0; + } + st->gc_ref_chunks = new_chunks; + for (int i = st->gc_ref_chunk_count; i < needed_chunks; i++) { + st->gc_ref_chunks[i] = (AOTGCRefChunk *)calloc(1, sizeof(AOTGCRefChunk)); + if (!st->gc_ref_chunks[i]) { + JS_RaiseOOM(ctx); + return 0; + } + } + st->gc_ref_chunk_count = needed_chunks; + return 1; +} + +static inline JSGCRef *aot_gc_ref_at(NativeRTState *st, int depth_index) { + int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; + int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; + return &st->gc_ref_chunks[chunk_index]->refs[slot_index]; +} + +static inline uint8_t *aot_gc_ref_inited_at(NativeRTState *st, int depth_index) { + int chunk_index = depth_index / AOT_GC_REF_CHUNK_SIZE; + int slot_index = depth_index % AOT_GC_REF_CHUNK_SIZE; + return &st->gc_ref_chunks[chunk_index]->inited[slot_index]; +} + +static inline void aot_gc_ref_activate(JSContext *ctx, NativeRTState *st, int depth_index) { + JSGCRef *ref = aot_gc_ref_at(st, depth_index); + uint8_t *inited = aot_gc_ref_inited_at(st, depth_index); + if (!*inited) { + JS_AddGCRef(ctx, ref); + *inited = 1; + } } JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) { - if (g_aot_depth >= MAX_AOT_DEPTH || !stack_space_ok()) { - JS_RaiseDisrupt(ctx, "native call stack overflow (depth %d)", g_aot_depth); + NativeRTState *st = native_state(ctx); + if (!st) return NULL; + if (!ensure_aot_gc_ref_slot(ctx, st, st->aot_depth)) { return NULL; } JSFrameRegister *frame = alloc_frame_register(ctx, (int)nr_slots); if (!frame) return NULL; - JSGCRef *ref = &g_aot_gc_refs[g_aot_depth]; - JS_AddGCRef(ctx, ref); + aot_gc_ref_activate(ctx, st, st->aot_depth); + JSGCRef *ref = aot_gc_ref_at(st, st->aot_depth); ref->val = JS_MKPTR(frame); - g_aot_depth++; + st->aot_depth++; return (JSValue *)frame->slots; } +/* Push an already-allocated frame onto the active AOT frame stack. */ +static int cell_rt_push_existing_frame(JSContext *ctx, JSValue frame_val) { + NativeRTState *st = native_state(ctx); + if (!st) return 0; + if (!ensure_aot_gc_ref_slot(ctx, st, st->aot_depth)) + return 0; + aot_gc_ref_activate(ctx, st, st->aot_depth); + JSGCRef *ref = aot_gc_ref_at(st, st->aot_depth); + ref->val = frame_val; + st->aot_depth++; + return 1; +} + JSValue *cell_rt_refresh_fp(JSContext *ctx) { - (void)ctx; - if (g_aot_depth <= 0) { - fprintf(stderr, "[BUG] cell_rt_refresh_fp: g_aot_depth=%d\n", g_aot_depth); + NativeRTState *st = native_state(ctx); + if (!st) return NULL; + if (st->aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_refresh_fp: aot_depth=%d\n", st->aot_depth); abort(); } - JSValue val = g_aot_gc_refs[g_aot_depth - 1].val; + JSValue val = aot_gc_ref_at(st, st->aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp: frame is NULL at depth=%d val=%lld\n", - g_aot_depth, (long long)val); + st->aot_depth, (long long)val); abort(); } return (JSValue *)frame->slots; } -/* Combined refresh + exception check in a single call. - Returns the refreshed fp, or NULL if there is a pending exception. - This avoids QBE register-allocation issues from two consecutive calls. */ +/* Combined refresh + exception check in a single call. */ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { + NativeRTState *st = native_state(ctx); + if (!st) return NULL; if (JS_HasException(ctx)) return NULL; - if (g_aot_depth <= 0) { - fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: g_aot_depth=%d\n", g_aot_depth); + if (st->aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: aot_depth=%d\n", st->aot_depth); abort(); } - JSValue val = g_aot_gc_refs[g_aot_depth - 1].val; + JSValue val = aot_gc_ref_at(st, st->aot_depth - 1)->val; JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val); if (!frame) { fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: frame is NULL\n"); @@ -431,134 +720,415 @@ JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) { } void cell_rt_leave_frame(JSContext *ctx) { - g_aot_depth--; - JS_DeleteGCRef(ctx, &g_aot_gc_refs[g_aot_depth]); + NativeRTState *st = native_state(ctx); + if (!st) return; + if (st->aot_depth <= 0) { + fprintf(stderr, "[BUG] cell_rt_leave_frame underflow\n"); + abort(); + } + st->aot_depth--; + aot_gc_ref_at(st, st->aot_depth)->val = JS_NULL; } /* --- Function creation and calling --- */ typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp); -/* Per-module function registry. - Each native .cm module gets its own dylib. When a module creates closures - via cell_rt_make_function, we record the dylib handle so the trampoline - can look up the correct cell_fn_N in the right dylib. */ -#define MAX_NATIVE_FN 32768 +/* ============================================================ + Dispatch loop — the core of native function execution. + Each compiled cell_fn_N returns to this loop when it needs + to call another function (instead of recursing via C stack). + ============================================================ */ -static struct { - void *dl_handle; - int fn_idx; - JSGCRef frame_ref; /* independent GC ref for enclosing frame */ - int has_frame_ref; -} g_native_fn_registry[MAX_NATIVE_FN]; - -static int g_native_fn_count = 0; - -/* Set before executing a native module's cell_main */ -static void *g_current_dl_handle = NULL; - -/* Derive the outer frame's slots pointer from the closure's own GC ref. - Each closure keeps an independent GC ref so the enclosing frame - survives even after cell_rt_leave_frame pops the stack ref. */ -static JSValue *derive_outer_fp(int magic) { - if (!g_native_fn_registry[magic].has_frame_ref) return NULL; - JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR( - g_native_fn_registry[magic].frame_ref.val); - return (JSValue *)frame->slots; -} - -static void reclaim_native_fns(JSContext *ctx, int saved_count) { - /* Free GC refs for temporary closures created during a call */ - for (int i = saved_count; i < g_native_fn_count; i++) { - if (g_native_fn_registry[i].has_frame_ref) { - JS_DeleteGCRef(ctx, &g_native_fn_registry[i].frame_ref); - g_native_fn_registry[i].has_frame_ref = 0; - } +/* Poll pause state on taken backward jumps (AOT backedges). + MACH can suspend/resume a register VM frame at pc granularity; native AOT + does not currently have an equivalent resume point, so we acknowledge timer + pauses by clearing pause_flag and continuing the current turn. */ +int cell_rt_check_backedge(JSContext *ctx) { + int pf = atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed); + if (pf >= 1) { + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); } - g_native_fn_count = saved_count; + return 0; } -static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val, - int argc, JSValue *argv, int magic) { - if (magic < 0 || magic >= g_native_fn_count) - return JS_RaiseDisrupt(ctx, "invalid native function id %d", magic); +void cell_rt_signal_call(JSContext *ctx, void *fp, int64_t frame_slot) { + NativeRTState *st = native_state(ctx); + if (!st) return; + JSValue *slots = (JSValue *)fp; + st->pending_callee_frame = slots[frame_slot]; + st->pending_is_tail = 0; +} - void *handle = g_native_fn_registry[magic].dl_handle; - int fn_idx = g_native_fn_registry[magic].fn_idx; +void cell_rt_signal_tail_call(JSContext *ctx, void *fp, int64_t frame_slot) { + NativeRTState *st = native_state(ctx); + if (!st) return; + JSValue *slots = (JSValue *)fp; + st->pending_callee_frame = slots[frame_slot]; + st->pending_is_tail = 1; +} - char name[64]; - snprintf(name, sizeof(name), "cell_fn_%d", fn_idx); +/* Entry point called from JS_CallInternal / JS_Call / MACH_INVOKE + for JS_FUNC_KIND_NATIVE functions. */ +JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, + JSValue this_obj, int argc, JSValue *argv) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + JSFunction *f = JS_VALUE_GET_FUNCTION(func_obj); + cell_compiled_fn fn = (cell_compiled_fn)JS_VALUE_GET_CODE(f->u.cell.code)->u.native.fn_ptr; + int nr_slots = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots; + int arity = f->length; + void *prev_dl_handle = st->current_dl_handle; + st->current_dl_handle = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.dl_handle; - cell_compiled_fn fn = (cell_compiled_fn)dlsym(handle, name); - if (!fn) - return JS_RaiseDisrupt(ctx, "native function %s not found in dylib", name); +#define RETURN_DISPATCH(v) \ + do { \ + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); \ + st->current_dl_handle = prev_dl_handle; \ + return (v); \ + } while (0) - /* Allocate GC-managed frame: slot 0 = this, slots 1..argc = args */ - JSValue *fp = cell_rt_enter_frame(ctx, 512); - if (!fp) return JS_EXCEPTION; - fp[0] = this_val; - for (int i = 0; i < argc && i < 510; i++) + /* Root func_obj across allocation — GC can move it */ + JSGCRef func_ref; + JS_PushGCRef(ctx, &func_ref); + func_ref.val = func_obj; + + /* Allocate initial frame */ + JSValue *fp = cell_rt_enter_frame(ctx, nr_slots); + if (!fp) { + JS_PopGCRef(ctx, &func_ref); + RETURN_DISPATCH(JS_EXCEPTION); + } + + /* Re-derive func_obj after potential GC */ + func_obj = func_ref.val; + JS_PopGCRef(ctx, &func_ref); + + /* Set up frame: this in slot 0, args in slots 1..N */ + fp[0] = this_obj; + int copy = (argc < arity) ? argc : arity; + if (copy < 0) copy = argc; /* variadic: copy all */ + for (int i = 0; i < copy && i < nr_slots - 1; i++) fp[1 + i] = argv[i]; - /* Store the magic ID (registry index) so cell_rt_get/put_closure - can re-derive the enclosing frame from the GC ref at call time, - surviving GC moves */ - fp[QBE_FRAME_OUTER_SLOT] = (JSValue)(int64_t)magic; + /* Link function to frame for closure access */ + JSFrameRegister *frame = (JSFrameRegister *)((char *)fp - offsetof(JSFrameRegister, slots)); + frame->function = func_obj; - /* Set g_current_dl_handle so any closures created during this call - (e.g. inner functions returned by factory functions) are registered - against the correct dylib */ - void *prev_handle = g_current_dl_handle; - g_current_dl_handle = handle; + int base_depth = st->aot_depth; /* remember entry depth for return detection */ - /* At top-level (depth 1 = this is the outermost native call), - save the fn count so we can reclaim temporary closures after */ - int saved_fn_count = (g_aot_depth == 1) ? g_native_fn_count : -1; + for (;;) { + st->pending_callee_frame = 0; + st->pending_is_tail = 0; + if (atomic_load_explicit(&ctx->pause_flag, memory_order_relaxed) >= 1) + atomic_store_explicit(&ctx->pause_flag, 0, memory_order_relaxed); - JSValue result = fn(ctx, fp); - cell_rt_leave_frame(ctx); - g_current_dl_handle = prev_handle; + /* Keep closure creation bound to the currently executing native module. */ + if (JS_IsFunction(frame->function)) { + JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); + if (cur_fn->kind == JS_FUNC_KIND_NATIVE) + st->current_dl_handle = JS_VALUE_GET_CODE(cur_fn->u.cell.code)->u.native.dl_handle; + } - /* Reclaim temporary closures created during this top-level call */ - if (saved_fn_count >= 0) - reclaim_native_fns(ctx, saved_fn_count); + JSValue result = fn(ctx, fp); - if (result == JS_EXCEPTION) { - /* Ensure there is a pending exception. QBE @_exc_ret returns 15 - but may not have set one (e.g. if cell_rt_enter_frame failed). */ - if (!JS_HasException(ctx)) - ctx->current_exception = JS_NULL; + /* Re-derive frame after potential GC */ + if (st->aot_depth <= 0) { + fprintf(stderr, "[BUG] native dispatch lost frame depth after fn call\n"); + abort(); + } + JSValue frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + + if (st->pending_callee_frame != 0) { + /* Function signaled a call — dispatch it */ + JSValue callee_frame_val = st->pending_callee_frame; + st->pending_callee_frame = 0; + int pending_is_tail = st->pending_is_tail; + st->pending_is_tail = 0; + JSGCRef callee_ref; + JS_PushGCRef(ctx, &callee_ref); + callee_ref.val = callee_frame_val; + JSFrameRegister *callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + int callee_argc = JS_VALUE_GET_INT(callee_fr->address); + if (callee_argc < 0) + callee_argc = 0; + JSValue callee_fn_val = callee_fr->function; + + if (!JS_IsFunction(callee_fn_val)) { + JS_RaiseDisrupt(ctx, "not a function"); + /* Resume caller with exception pending */ + JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_fn->u.cell.code)->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_ref); + continue; + } + + JSGCRef callee_fn_ref; + JS_PushGCRef(ctx, &callee_fn_ref); + callee_fn_ref.val = callee_fn_val; + JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(callee_fn_ref.val); + + if (callee_fn->kind == JS_FUNC_KIND_NATIVE) { + /* Native-to-native call — no C stack growth */ + cell_compiled_fn callee_ptr = (cell_compiled_fn)JS_VALUE_GET_CODE(callee_fn->u.cell.code)->u.native.fn_ptr; + + if (pending_is_tail) { + /* Tail call: replace current frame with the prepared callee frame. */ + JSValue saved_caller = frame->caller; + + /* Pop old frame */ + cell_rt_leave_frame(ctx); + + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + callee_fn_val = callee_fn_ref.val; + callee_fr->function = callee_fn_val; + callee_fr->caller = saved_caller; + callee_fr->address = JS_NewInt32(ctx, 0); + + if (!cell_rt_push_existing_frame(ctx, callee_ref.val)) { + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(JS_EXCEPTION); + } + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(st, st->aot_depth - 1)->val); + fp = (JSValue *)frame->slots; + fn = callee_ptr; + } else { + /* Regular call: link caller and push prepared callee frame. */ + int ret_info = JS_VALUE_GET_INT(frame->address); + int resume_seg = ret_info >> 16; + int ret_slot = ret_info & 0xFFFF; + + /* Save return address in caller */ + frame->address = JS_NewInt32(ctx, (resume_seg << 16) | ret_slot); + + callee_fr = (JSFrameRegister *)JS_VALUE_GET_PTR(callee_ref.val); + callee_fn_val = callee_fn_ref.val; + callee_fr->function = callee_fn_val; + callee_fr->caller = JS_MKPTR(frame); + callee_fr->address = JS_NewInt32(ctx, 0); + + if (!cell_rt_push_existing_frame(ctx, callee_ref.val)) { + /* Resume caller with exception pending */ + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_fn->u.cell.code)->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); + continue; + } + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(aot_gc_ref_at(st, st->aot_depth - 1)->val); + fp = (JSValue *)frame->slots; + fn = callee_ptr; + } + } else { + /* Non-native callee (C function, register VM, etc.) — + call it via the standard path and store the result */ + JSValue ret; + if (callee_fn->kind == JS_FUNC_KIND_C) + ret = js_call_c_function(ctx, callee_fn_val, callee_fr->slots[0], + callee_argc, &callee_fr->slots[1]); + else + ret = JS_CallInternal(ctx, callee_fn_val, callee_fr->slots[0], + callee_argc, &callee_fr->slots[1], 0); + + /* Re-derive frame after call */ + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + + if (JS_IsException(ret)) { + /* Non-native callee threw — resume caller with exception pending. + Tag the pending return slot with JS_EXCEPTION so generated code + can branch without an extra JS_HasException C call. */ + if (!JS_HasException(ctx)) + JS_Disrupt(ctx); + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = JS_EXCEPTION; + /* fn and fp still point to the calling native function's frame. + Just resume it — it will detect JS_EXCEPTION in the return slot. */ + JSFunction *exc_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_fn->u.cell.code)->u.native.fn_ptr; + JS_PopGCRef(ctx, &callee_ref); + continue; + } + /* Clear stale exception */ + if (JS_HasException(ctx)) + JS_GetException(ctx); + + if (pending_is_tail) { + /* Tail call to non-native: return its result up the chain */ + /* Pop current frame and return to caller */ + if (st->aot_depth <= base_depth) { + cell_rt_leave_frame(ctx); + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(ret); + } + /* Pop current frame, return to caller frame */ + cell_rt_leave_frame(ctx); + if (st->aot_depth < base_depth) { + JS_PopGCRef(ctx, &callee_ref); + RETURN_DISPATCH(ret); + } + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = ret; + /* Resume caller */ + JSFunction *caller_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(caller_fn->u.cell.code)->u.native.fn_ptr; + } else { + /* Regular call: store result and resume current function */ + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = ret; + /* fn stays the same — we resume the same function at next segment */ + JSFunction *cur_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(cur_fn->u.cell.code)->u.native.fn_ptr; + } + } + JS_PopGCRef(ctx, &callee_fn_ref); + JS_PopGCRef(ctx, &callee_ref); + continue; + } + + /* No pending call — function returned a value or exception */ + if (result == JS_EXCEPTION) { + /* Exception: pop this frame and propagate to caller. + The caller's generated code has exception checks at resume points. */ + if (!JS_HasException(ctx)) + JS_Disrupt(ctx); + + if (st->aot_depth <= base_depth) { + cell_rt_leave_frame(ctx); + RETURN_DISPATCH(JS_EXCEPTION); + } + cell_rt_leave_frame(ctx); + if (st->aot_depth < base_depth) { + RETURN_DISPATCH(JS_EXCEPTION); + } + + /* Resume caller and tag the return slot with JS_EXCEPTION. */ + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = JS_EXCEPTION; + + JSFunction *exc_caller_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(exc_caller_fn->u.cell.code)->u.native.fn_ptr; + continue; + } + + /* Normal return — pop frame and store result in caller */ + if (st->aot_depth <= base_depth) { + cell_rt_leave_frame(ctx); + RETURN_DISPATCH(result); + } + cell_rt_leave_frame(ctx); + if (st->aot_depth < base_depth) { + RETURN_DISPATCH(result); + } + + /* Return to caller frame */ + frame_val = aot_gc_ref_at(st, st->aot_depth - 1)->val; + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); + fp = (JSValue *)frame->slots; + int ret_info = JS_VALUE_GET_INT(frame->address); + int ret_slot = ret_info & 0xFFFF; + if (ret_slot != 0xFFFF) + fp[ret_slot] = result; + + JSFunction *caller_fn = JS_VALUE_GET_FUNCTION(frame->function); + fn = (cell_compiled_fn)JS_VALUE_GET_CODE(caller_fn->u.cell.code)->u.native.fn_ptr; + continue; + } + +#undef RETURN_DISPATCH +} + +static JSValue aot_get_or_create_native_code(JSContext *ctx, NativeRTState *st, + void *dl_handle, int64_t fn_idx, + int arity, uint16_t nr_slots) { + for (int i = 0; i < st->code_cache_count; i++) { + AOTCodeCacheEntry *e = &st->code_cache[i]; + if (e->dl_handle == dl_handle && e->fn_idx == fn_idx) + return e->code; + } + + char name[64]; + snprintf(name, sizeof(name), "cell_fn_%lld", (long long)fn_idx); + void *fn_ptr = dlsym(dl_handle, name); + if (!fn_ptr) + return JS_RaiseDisrupt(ctx, "native function %s not found in dylib", name); + + JSCode *code = ct_alloc(ctx, sizeof(JSCode), 8); + if (!code) return JS_EXCEPTION; + memset(code, 0, sizeof(*code)); + code->header = objhdr_make(0, OBJ_CODE, 0, 0, 0, 0); + code->kind = JS_CODE_KIND_NATIVE; + code->arity = (int16_t)arity; + code->u.native.fn_ptr = fn_ptr; + code->u.native.dl_handle = dl_handle; + code->u.native.nr_slots = nr_slots; + JSValue code_obj = JS_MKPTR(code); + + if (st->code_cache_count >= st->code_cache_cap) { + int new_cap = st->code_cache_cap ? (st->code_cache_cap * 2) : 128; + AOTCodeCacheEntry *new_cache = + (AOTCodeCacheEntry *)realloc(st->code_cache, (size_t)new_cap * sizeof(*new_cache)); + if (!new_cache) + return JS_RaiseOOM(ctx); + st->code_cache = new_cache; + st->code_cache_cap = new_cap; } - return result; + + st->code_cache[st->code_cache_count].dl_handle = dl_handle; + st->code_cache[st->code_cache_count].fn_idx = fn_idx; + st->code_cache[st->code_cache_count].code = code_obj; + st->code_cache_count++; + return code_obj; } +/* Create a native function object from a compiled fn_idx. + Called from QBE-generated code during function creation. */ JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp, - int64_t nr_args) { + int64_t nr_args, int64_t nr_slots) { (void)outer_fp; - if (g_native_fn_count >= MAX_NATIVE_FN) - return JS_RaiseDisrupt(ctx, "too many native functions (max %d)", MAX_NATIVE_FN); + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + if (!st->current_dl_handle) + return JS_RaiseDisrupt(ctx, "no native module loaded"); - int global_id = g_native_fn_count++; - g_native_fn_registry[global_id].dl_handle = g_current_dl_handle; - g_native_fn_registry[global_id].fn_idx = (int)fn_idx; + JSValue code_obj = aot_get_or_create_native_code( + ctx, st, st->current_dl_handle, fn_idx, (int)nr_args, (uint16_t)nr_slots); + if (JS_IsException(code_obj)) + return JS_EXCEPTION; - /* Create independent GC ref so the enclosing frame survives - even after cell_rt_leave_frame pops the stack ref */ - if (g_aot_depth > 0) { - JSGCRef *ref = &g_native_fn_registry[global_id].frame_ref; - JS_AddGCRef(ctx, ref); - ref->val = g_aot_gc_refs[g_aot_depth - 1].val; - g_native_fn_registry[global_id].has_frame_ref = 1; - } else { - g_native_fn_registry[global_id].has_frame_ref = 0; - } + /* Get the current frame as outer_frame for closures */ + JSValue outer_frame = JS_NULL; + if (st->aot_depth > 0) + outer_frame = aot_gc_ref_at(st, st->aot_depth - 1)->val; - return JS_NewCFunction2(ctx, (JSCFunction *)cell_fn_trampoline, "native_fn", - (int)nr_args, JS_CFUNC_generic_magic, global_id); + return js_new_native_function_with_code(ctx, code_obj, (int)nr_args, outer_frame); } -/* --- Frame-based function calling --- */ +/* --- Frame-based function calling --- + Still used by QBE-generated code for building call frames + before signaling the dispatch loop. */ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) { if (!JS_IsFunction(fn)) { @@ -566,9 +1136,13 @@ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) { return JS_EXCEPTION; } int nr_slots = (int)nargs + 2; + JSFunction *f = JS_VALUE_GET_FUNCTION(fn); + if (f->kind == JS_FUNC_KIND_NATIVE && JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots > nr_slots) + nr_slots = JS_VALUE_GET_CODE(f->u.cell.code)->u.native.nr_slots; JSFrameRegister *new_frame = alloc_frame_register(ctx, nr_slots); if (!new_frame) return JS_EXCEPTION; new_frame->function = fn; + new_frame->address = JS_NewInt32(ctx, (int)nargs); return JS_MKPTR(new_frame); } @@ -578,11 +1152,12 @@ void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) { fr->slots[idx] = val; } +/* cell_rt_invoke — still used for non-dispatch-loop paths (e.g. old code) */ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { if (frame_val == JS_EXCEPTION) return JS_EXCEPTION; JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val); - int nr_slots = (int)objhdr_cap56(fr->header); - int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0; + int c_argc = JS_VALUE_GET_INT(fr->address); + if (c_argc < 0) c_argc = 0; JSValue fn_val = fr->function; if (!JS_IsFunction(fn_val)) { @@ -594,11 +1169,10 @@ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { JSValue result; if (fn->kind == JS_FUNC_KIND_C) { - /* Match MACH_INVOKE: C functions go directly to js_call_c_function, - bypassing JS_Call's arity check. Extra args are silently available. */ result = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); + } else if (fn->kind == JS_FUNC_KIND_NATIVE) { + result = cell_native_dispatch(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]); } else { - /* Register/bytecode functions — use JS_CallInternal (no arity gate) */ JSValue args[c_argc > 0 ? c_argc : 1]; for (int i = 0; i < c_argc; i++) args[i] = fr->slots[i + 1]; @@ -607,9 +1181,6 @@ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) { if (JS_IsException(result)) return JS_EXCEPTION; - /* Clear any stale exception left by functions that returned a valid - value despite internal error (e.g., sign("text") returns null - but JS_ToFloat64 leaves an exception flag) */ if (JS_HasException(ctx)) JS_GetException(ctx); return result; @@ -643,14 +1214,27 @@ JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key) { return JS_NewBool(ctx, ret >= 0); } -JSValue cell_rt_delete_str(JSContext *ctx, JSValue obj, const char *name) { - JSValue key = JS_NewString(ctx, name); +static JSValue cell_rt_delete_key(JSContext *ctx, JSValue obj, JSValue key) { int ret = JS_DeleteProperty(ctx, obj, key); if (ret < 0) return JS_EXCEPTION; return JS_NewBool(ctx, ret >= 0); } +JSValue cell_rt_delete_str(JSContext *ctx, JSValue obj, const char *name) { + JSValue key = aot_key_from_cstr(ctx, name); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_delete_key(ctx, obj, key); +} + +JSValue cell_rt_delete_lit(JSContext *ctx, JSValue obj, int64_t lit_idx) { + JSValue key = aot_lit_from_index(ctx, lit_idx); + if (JS_IsException(key)) + return JS_EXCEPTION; + return cell_rt_delete_key(ctx, obj, key); +} + /* --- Typeof --- */ JSValue cell_rt_typeof(JSContext *ctx, JSValue val) { @@ -765,6 +1349,9 @@ void cell_rt_clear_exception(JSContext *ctx) { /* --- Disruption --- */ +/* Disrupt: silently set exception flag like the bytecode VM does. + Does NOT call JS_ThrowTypeError — that would print to stderr + even when a disruption handler will catch it. */ void cell_rt_disrupt(JSContext *ctx) { JS_RaiseDisrupt(ctx, "type error in native code"); } @@ -790,70 +1377,109 @@ JSValue cell_rt_regexp(JSContext *ctx, const char *pattern, const char *flags) { /* --- Module entry point --- Loads a native .cm module from a dylib handle. - Looks up cell_main, builds a heap-allocated frame, sets - g_current_dl_handle so closures register in the right module. */ + Looks up cell_main, builds a heap-allocated frame, and + records active module handle in per-actor native state. */ -JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env) { - cell_compiled_fn fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main"); - if (!fn) - return JS_RaiseDisrupt(ctx, "cell_main not found in native module dylib"); +/* Helper: run a native module's entry point through the dispatch loop. + Creates a temporary JS_FUNC_KIND_NATIVE function so that the full + dispatch loop (tail calls, closures, etc.) works for module-level code. */ +static JSValue native_module_run(JSContext *ctx, void *dl_handle, + cell_compiled_fn entry, int nr_slots) { + NativeRTState *st = native_state(ctx); + if (!st) return JS_EXCEPTION; + void *prev_handle = st->current_dl_handle; + st->current_dl_handle = dl_handle; - /* Set current handle so cell_rt_make_function registers closures - against this module's dylib */ - void *prev_handle = g_current_dl_handle; - g_current_dl_handle = dl_handle; - - /* Make env available for cell_rt_get_intrinsic lookups */ - cell_rt_set_native_env(ctx, env); - - /* GC-managed frame for module execution */ - JSValue *fp = cell_rt_enter_frame(ctx, 512); - if (!fp) { - g_current_dl_handle = prev_handle; - return JS_RaiseDisrupt(ctx, "frame allocation failed"); + /* Create a native function object for the entry point */ + JSValue func_obj = js_new_native_function(ctx, (void *)entry, dl_handle, + (uint16_t)nr_slots, 0, JS_NULL); + if (JS_IsException(func_obj)) { + st->current_dl_handle = prev_handle; + return JS_EXCEPTION; } /* Clear any stale exception left by a previous interpreted run */ if (JS_HasException(ctx)) JS_GetException(ctx); - JSValue result = fn(ctx, fp); - cell_rt_leave_frame(ctx); /* safe — closures have independent GC refs */ - g_current_dl_handle = prev_handle; - if (result == JS_EXCEPTION) - return JS_EXCEPTION; + JSValue result = cell_native_dispatch(ctx, func_obj, JS_NULL, 0, NULL); + st->current_dl_handle = prev_handle; return result; } +JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env) { + cell_compiled_fn fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main"); + if (!fn) + return JS_RaiseDisrupt(ctx, "cell_main not found in native module dylib"); + + /* Make env available for cell_rt_get_intrinsic lookups */ + cell_rt_set_native_env(ctx, env); + + /* Try to read nr_slots from the module (exported by emitter) */ + int *slots_ptr = (int *)dlsym(dl_handle, "cell_main_nr_slots"); + int nr_slots = slots_ptr ? *slots_ptr : 512; + + return native_module_run(ctx, dl_handle, fn, nr_slots); +} + /* Load a native module from a dylib handle, trying a named symbol first. Falls back to cell_main if the named symbol is not found. */ JSValue cell_rt_native_module_load_named(JSContext *ctx, void *dl_handle, const char *sym_name, JSValue env) { cell_compiled_fn fn = NULL; - if (sym_name) + const char *used_name = NULL; + if (sym_name) { fn = (cell_compiled_fn)dlsym(dl_handle, sym_name); - if (!fn) + if (fn) used_name = sym_name; + } + if (!fn) { fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main"); + used_name = "cell_main"; + } if (!fn) return JS_RaiseDisrupt(ctx, "symbol not found in native module dylib"); - void *prev_handle = g_current_dl_handle; - g_current_dl_handle = dl_handle; - /* Make env available for cell_rt_get_intrinsic lookups */ cell_rt_set_native_env(ctx, env); - JSValue *fp = cell_rt_enter_frame(ctx, 512); - if (!fp) { - g_current_dl_handle = prev_handle; - return JS_RaiseDisrupt(ctx, "frame allocation failed"); + /* Try to read nr_slots from the module */ + char slots_sym[128]; + snprintf(slots_sym, sizeof(slots_sym), "%s_nr_slots", used_name); + int *slots_ptr = (int *)dlsym(dl_handle, slots_sym); + int nr_slots = slots_ptr ? *slots_ptr : 512; + + return native_module_run(ctx, dl_handle, fn, nr_slots); +} + +void cell_rt_free_native_state(JSContext *ctx) { + NativeRTState *st = (NativeRTState *)ctx->native_state; + if (!st) return; + + aot_clear_lit_pool(ctx, st); + + if (st->has_native_env && st->native_env_ref_inited) { + JS_DeleteGCRef(ctx, &st->native_env_ref); + st->native_env_ref_inited = 0; + st->native_env_ref.val = JS_NULL; } - JSValue result = fn(ctx, fp); - cell_rt_leave_frame(ctx); /* safe — closures have independent GC refs */ - g_current_dl_handle = prev_handle; - if (result == JS_EXCEPTION) - return JS_EXCEPTION; - return result; + for (int ci = 0; ci < st->gc_ref_chunk_count; ci++) { + AOTGCRefChunk *chunk = st->gc_ref_chunks[ci]; + if (!chunk) continue; + for (int si = 0; si < AOT_GC_REF_CHUNK_SIZE; si++) { + if (chunk->inited[si]) { + JS_DeleteGCRef(ctx, &chunk->refs[si]); + chunk->inited[si] = 0; + chunk->refs[si].val = JS_NULL; + } + } + free(chunk); + } + + free(st->gc_ref_chunks); + free(st->key_cache); + free(st->code_cache); + js_free_rt(st); + ctx->native_state = NULL; } /* Backward-compat: uses RTLD_DEFAULT (works when dylib opened with RTLD_GLOBAL) */ diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index aa3ea257..db5546d6 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -120,8 +120,8 @@ typedef struct JSBlob JSBlob; typedef struct JSText JSText; typedef struct JSRecord JSRecord; typedef struct JSFunction JSFunction; -typedef struct JSFrame JSFrame; typedef struct JSCode JSCode; +typedef struct JSFrame JSFrame; #define OBJHDR_CAP_SHIFT 8u #define OBJHDR_CAP_MASK (((objhdr_t)1ull << 56) - 1ull) @@ -278,7 +278,6 @@ typedef void (*JSLogCallback)(JSContext *ctx, const char *channel, const char *m /* Forward declaration for bytecode freeing */ #define JS_VALUE_GET_BLOB(v) ((JSBlob *)JS_VALUE_GET_PTR (v)) -#define JS_VALUE_GET_CODE(v) (JS_VALUE_GET_PTR (v)) #ifdef HEAP_CHECK void heap_check_fail(void *ptr, struct JSContext *ctx); @@ -286,6 +285,7 @@ void heap_check_fail(void *ptr, struct JSContext *ctx); #define JS_VALUE_GET_OBJ(v) ((JSRecord *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_TEXT(v) ((JSText *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_FUNCTION(v) ((JSFunction *)heap_check_chase(ctx, v)) +#define JS_VALUE_GET_CODE(v) ((JSCode *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_FRAME(v) ((JSFrame *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_STRING(v) ((JSText *)heap_check_chase(ctx, v)) #define JS_VALUE_GET_RECORD(v) ((JSRecord *)heap_check_chase(ctx, v)) @@ -294,6 +294,7 @@ void heap_check_fail(void *ptr, struct JSContext *ctx); #define JS_VALUE_GET_OBJ(v) ((JSRecord *)chase (v)) #define JS_VALUE_GET_TEXT(v) ((JSText *)chase (v)) #define JS_VALUE_GET_FUNCTION(v) ((JSFunction *)chase (v)) +#define JS_VALUE_GET_CODE(v) ((JSCode *)chase (v)) #define JS_VALUE_GET_FRAME(v) ((JSFrame *)chase (v)) #define JS_VALUE_GET_STRING(v) ((JSText *)chase (v)) #define JS_VALUE_GET_RECORD(v) ((JSRecord *)chase (v)) @@ -485,6 +486,17 @@ typedef enum MachOpcode { MACH_MOD, /* R(A) = R(B) % R(C) */ MACH_POW, /* R(A) = R(B) ** R(C) */ MACH_NEG, /* R(A) = -R(B) */ + MACH_REMAINDER, /* R(A) = remainder(R(B), R(C)) */ + MACH_MAX, /* R(A) = max(R(B), R(C)) */ + MACH_MIN, /* R(A) = min(R(B), R(C)) */ + MACH_ABS, /* R(A) = abs(R(B)) */ + MACH_SIGN, /* R(A) = sign(R(B)) */ + MACH_FRACTION, /* R(A) = fraction(R(B)) */ + MACH_INTEGER, /* R(A) = integer(R(B)) */ + MACH_FLOOR, /* R(A) = floor(R(B), R(C)) */ + MACH_CEILING, /* R(A) = ceiling(R(B), R(C)) */ + MACH_ROUND, /* R(A) = round(R(B), R(C)) */ + MACH_TRUNC, /* R(A) = trunc(R(B), R(C)) */ MACH__DEAD_INC, /* reserved — was MACH_INC, never emitted */ MACH__DEAD_DEC, /* reserved — was MACH_DEC, never emitted */ @@ -658,6 +670,17 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = { [MACH_MOD] = "mod", [MACH_POW] = "pow", [MACH_NEG] = "neg", + [MACH_REMAINDER] = "remainder", + [MACH_MAX] = "max", + [MACH_MIN] = "min", + [MACH_ABS] = "abs", + [MACH_SIGN] = "sign", + [MACH_FRACTION] = "fraction", + [MACH_INTEGER] = "integer", + [MACH_FLOOR] = "floor", + [MACH_CEILING] = "ceiling", + [MACH_ROUND] = "round", + [MACH_TRUNC] = "trunc", [MACH__DEAD_INC] = "dead_inc", [MACH__DEAD_DEC] = "dead_dec", [MACH_EQ] = "eq", @@ -1098,6 +1121,7 @@ struct JSContext { JSGCRef *last_gc_ref; /* used to reference temporary GC roots (list) */ JSLocalRef *top_local_ref; /* for JS_LOCAL macro - GC updates C locals through pointers */ CCallRoot *c_call_root; /* stack of auto-rooted C call argv arrays */ + void *native_state; /* qbe_helpers.c per-actor native runtime state */ int class_count; /* size of class_array and class_proto */ JSClass *class_array; @@ -1316,8 +1340,30 @@ typedef enum { JS_FUNC_KIND_BYTECODE, JS_FUNC_KIND_C_DATA, JS_FUNC_KIND_REGISTER, /* register-based VM function */ + JS_FUNC_KIND_NATIVE, /* QBE-compiled native function */ } JSFunctionKind; +typedef enum { + JS_CODE_KIND_REGISTER = 1, + JS_CODE_KIND_NATIVE = 2, +} JSCodeKind; + +typedef struct JSCode { + objhdr_t header; /* OBJ_CODE */ + uint8_t kind; + int16_t arity; + union { + struct { + JSCodeRegister *code; + } reg; + struct { + void *fn_ptr; /* compiled cell_fn_N pointer */ + void *dl_handle; /* dylib handle for dlsym lookups */ + uint16_t nr_slots; /* frame size for this function */ + } native; + } u; +} JSCode; + typedef struct JSFunction { objhdr_t header; /* must come first */ JSValue name; /* function name as JSValue text */ @@ -1330,10 +1376,10 @@ typedef struct JSFunction { int16_t magic; } cfunc; struct { - JSCodeRegister *code; /* compiled register code (off-heap) */ + JSValue code; /* JSCode object (OBJ_CODE) */ JSValue env_record; /* stone record, module environment */ JSValue outer_frame; /* JSFrame JSValue, for closures */ - } reg; + } cell; } u; } JSFunction; @@ -1356,6 +1402,7 @@ typedef struct JSFunction { JSValue js_call_c_function (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv); JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv, int flags); JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame); +JSValue cell_native_dispatch(JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv); int JS_DeleteProperty (JSContext *ctx, JSValue obj, JSValue prop); JSValue __attribute__ ((format (printf, 2, 3))) JS_RaiseDisrupt (JSContext *ctx, const char *fmt, ...); @@ -1394,8 +1441,6 @@ static JSValue js_cell_splat (JSContext *ctx, JSValue this_val, int argc, JSValu static JSValue js_cell_meme (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_fn_apply (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_call (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_modulo (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_neg (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_not (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); JSValue js_cell_text_lower (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); JSValue js_cell_text_upper (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); @@ -1406,17 +1451,6 @@ static JSValue js_cell_text_search (JSContext *ctx, JSValue this_val, int argc, static JSValue js_cell_text_extract (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); JSValue js_cell_character (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_number (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_abs (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_sign (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_floor (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_ceiling (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_round (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_trunc (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_whole (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_fraction (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_min (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_max (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); -static JSValue js_cell_number_remainder (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_object (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_cell_text_format (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); static JSValue js_print (JSContext *ctx, JSValue this_val, int argc, JSValue *argv); @@ -1529,6 +1563,8 @@ static inline void set_value (JSContext *ctx, JSValue *pval, JSValue new_val) { *pval = new_val; } +int cell_rt_native_active(JSContext *ctx); + static inline __exception int js_poll_interrupts (JSContext *ctx) { if (unlikely (atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2)) { JS_RaiseDisrupt (ctx, "interrupted"); @@ -1623,7 +1659,10 @@ JSValue js_key_from_string (JSContext *ctx, JSValue val); /* mach.c exports */ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame); +JSValue js_new_native_function(JSContext *ctx, void *fn_ptr, void *dl_handle, uint16_t nr_slots, int arity, JSValue outer_frame); +JSValue js_new_native_function_with_code(JSContext *ctx, JSValue code_obj, int arity, JSValue outer_frame); JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count); +void cell_rt_free_native_state(JSContext *ctx); #endif /* QUICKJS_INTERNAL_H */ diff --git a/source/runtime.c b/source/runtime.c index 06235eec..9bc46d5d 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -53,8 +53,8 @@ void heap_check_fail(void *ptr, JSContext *ctx) { JSFunction *fn = (JSFunction *)JS_VALUE_GET_PTR(frame->function); const char *name = NULL, *file = NULL; uint16_t line = 0; - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - JSCodeRegister *code = fn->u.reg.code; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + JSCodeRegister *code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; file = code->filename_cstr; name = code->name_cstr; if (!first) @@ -1394,12 +1394,14 @@ void gc_scan_object (JSContext *ctx, void *ptr, uint8_t *from_base, uint8_t *fro JSFunction *fn = (JSFunction *)ptr; /* Scan the function name */ fn->name = gc_copy_value (ctx, fn->name, from_base, from_end, to_base, to_free, to_end); - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { /* Scan code tree to arbitrary nesting depth */ - gc_scan_code_tree (ctx, fn->u.reg.code, from_base, from_end, to_base, to_free, to_end); + gc_scan_code_tree (ctx, JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code, from_base, from_end, to_base, to_free, to_end); /* Scan outer_frame and env_record */ - fn->u.reg.outer_frame = gc_copy_value (ctx, fn->u.reg.outer_frame, from_base, from_end, to_base, to_free, to_end); - fn->u.reg.env_record = gc_copy_value (ctx, fn->u.reg.env_record, from_base, from_end, to_base, to_free, to_end); + fn->u.cell.outer_frame = gc_copy_value (ctx, fn->u.cell.outer_frame, from_base, from_end, to_base, to_free, to_end); + fn->u.cell.env_record = gc_copy_value (ctx, fn->u.cell.env_record, from_base, from_end, to_base, to_free, to_end); + } else if (fn->kind == JS_FUNC_KIND_NATIVE) { + fn->u.cell.outer_frame = gc_copy_value (ctx, fn->u.cell.outer_frame, from_base, from_end, to_base, to_free, to_end); } break; } @@ -1432,10 +1434,10 @@ void gc_scan_object (JSContext *ctx, void *ptr, uint8_t *from_base, uint8_t *fro objhdr_t fh = *(objhdr_t *)JS_VALUE_GET_PTR (frame->function); if (objhdr_type (fh) == OBJ_FUNCTION) { JSFunction *fn = (JSFunction *)JS_VALUE_GET_PTR (frame->function); - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - if (fn->u.reg.code->name_cstr) fname = fn->u.reg.code->name_cstr; - if (fn->u.reg.code->filename_cstr) ffile = fn->u.reg.code->filename_cstr; - fnslots = fn->u.reg.code->nr_slots; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + if (JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->name_cstr) fname = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->name_cstr; + if (JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->filename_cstr) ffile = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->filename_cstr; + fnslots = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code->nr_slots; } } } @@ -1541,8 +1543,8 @@ int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size) { } if (objhdr_type (fnh) == OBJ_FUNCTION) { JSFunction *fnp = (JSFunction *)JS_VALUE_GET_PTR (fn_v); - if (fnp->kind == JS_FUNC_KIND_REGISTER && fnp->u.reg.code && fnp->u.reg.code->name_cstr) - fn_name = fnp->u.reg.code->name_cstr; + if (fnp->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fnp->u.cell.code)->u.reg.code && JS_VALUE_GET_CODE(fnp->u.cell.code)->u.reg.code->name_cstr) + fn_name = JS_VALUE_GET_CODE(fnp->u.cell.code)->u.reg.code->name_cstr; } } fprintf (stderr, "VALIDATE_GC: pre-gc frame %p slot[%llu] -> %p (chased %p) bad type %d (hdr=0x%llx) fn=%s\n", @@ -2100,6 +2102,7 @@ void JS_FreeContext (JSContext *ctx) { JSRuntime *rt = ctx->rt; int i; + cell_rt_free_native_state(ctx); JS_DeleteGCRef(ctx, &ctx->suspended_frame_ref); for (i = 0; i < ctx->class_count; i++) { @@ -4738,8 +4741,10 @@ JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, case JS_FUNC_KIND_C_DATA: return js_call_c_function (ctx, func_obj, this_obj, argc, argv); case JS_FUNC_KIND_REGISTER: - return JS_CallRegisterVM (ctx, f->u.reg.code, this_obj, argc, argv, - f->u.reg.env_record, f->u.reg.outer_frame); + return JS_CallRegisterVM (ctx, JS_VALUE_GET_CODE(f->u.cell.code)->u.reg.code, this_obj, argc, argv, + f->u.cell.env_record, f->u.cell.outer_frame); + case JS_FUNC_KIND_NATIVE: + return cell_native_dispatch (ctx, func_obj, this_obj, argc, argv); default: return JS_RaiseDisrupt (ctx, "not a function"); } @@ -4759,8 +4764,10 @@ JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, J case JS_FUNC_KIND_C: return js_call_c_function (ctx, func_obj, this_obj, argc, argv); case JS_FUNC_KIND_REGISTER: - return JS_CallRegisterVM (ctx, f->u.reg.code, this_obj, argc, argv, - f->u.reg.env_record, f->u.reg.outer_frame); + return JS_CallRegisterVM (ctx, JS_VALUE_GET_CODE(f->u.cell.code)->u.reg.code, this_obj, argc, argv, + f->u.cell.env_record, f->u.cell.outer_frame); + case JS_FUNC_KIND_NATIVE: + return cell_native_dispatch (ctx, func_obj, this_obj, argc, argv); default: return JS_RaiseDisrupt (ctx, "not a function"); } @@ -5326,6 +5333,10 @@ JSValue js_regexp_toString (JSContext *ctx, JSValue this_val, int argc, JSValue int lre_check_timeout (void *opaque) { JSContext *ctx = opaque; + if (cell_rt_native_active (ctx)) { + atomic_store_explicit (&ctx->pause_flag, 0, memory_order_relaxed); + return 0; + } return atomic_load_explicit (&ctx->pause_flag, memory_order_relaxed) >= 2; } @@ -10488,15 +10499,46 @@ JSValue JS_CellCall (JSContext *ctx, JSValue fn, JSValue this_val, JSValue args) return js_cell_call (ctx, JS_NULL, argc, argv); } +static int js_cell_read_number_strict (JSValue val, double *out) { + uint32_t tag = JS_VALUE_GET_TAG (val); + if (tag == JS_TAG_INT) { + *out = (double)JS_VALUE_GET_INT (val); + return 0; + } + if (JS_TAG_IS_FLOAT64 (tag)) { + *out = JS_VALUE_GET_FLOAT64 (val); + return 0; + } + return -1; +} + +static JSValue js_cell_number_from_double (JSContext *ctx, double d) { + if (d >= INT32_MIN && d <= INT32_MAX) { + int32_t i = (int32_t)d; + if ((double)i == d) + return JS_NewInt32 (ctx, i); + } + return JS_NewFloat64 (ctx, d); +} + /* C API: modulo(a, b) - modulo operation */ JSValue JS_CellModulo (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_modulo (ctx, JS_NULL, 2, argv); + double dividend, divisor; + if (js_cell_read_number_strict (a, ÷nd) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &divisor) < 0) return JS_NULL; + if (isnan (dividend) || isnan (divisor)) return JS_NULL; + if (divisor == 0.0) return JS_NULL; + if (dividend == 0.0) return JS_NewFloat64 (ctx, 0.0); + return js_cell_number_from_double (ctx, + dividend - (divisor * floor (dividend / divisor))); } /* C API: neg(val) - negate number */ JSValue JS_CellNeg (JSContext *ctx, JSValue val) { - return js_cell_neg (ctx, JS_NULL, 1, &val); + double d; + if (js_cell_read_number_strict (val, &d) < 0) return JS_NULL; + if (isnan (d)) return JS_NULL; + return js_cell_number_from_double (ctx, -d); } /* C API: not(val) - logical not */ @@ -10621,60 +10663,86 @@ JSValue JS_CellNumber (JSContext *ctx, JSValue val) { /* C API: abs(num) - absolute value */ JSValue JS_CellAbs (JSContext *ctx, JSValue num) { - return js_cell_number_abs (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, fabs (d)); } /* C API: sign(num) - sign of number (-1, 0, 1) */ JSValue JS_CellSign (JSContext *ctx, JSValue num) { - return js_cell_number_sign (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + if (d < 0) return JS_NewInt32 (ctx, -1); + if (d > 0) return JS_NewInt32 (ctx, 1); + return JS_NewInt32 (ctx, 0); } /* C API: floor(num) - floor */ JSValue JS_CellFloor (JSContext *ctx, JSValue num) { - return js_cell_number_floor (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, floor (d)); } /* C API: ceiling(num) - ceiling */ JSValue JS_CellCeiling (JSContext *ctx, JSValue num) { - return js_cell_number_ceiling (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, ceil (d)); } /* C API: round(num) - round to nearest integer */ JSValue JS_CellRound (JSContext *ctx, JSValue num) { - return js_cell_number_round (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, round (d)); } /* C API: trunc(num) - truncate towards zero */ JSValue JS_CellTrunc (JSContext *ctx, JSValue num) { - return js_cell_number_trunc (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, trunc (d)); } /* C API: whole(num) - integer part */ JSValue JS_CellWhole (JSContext *ctx, JSValue num) { - return js_cell_number_whole (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, trunc (d)); } /* C API: fraction(num) - fractional part */ JSValue JS_CellFraction (JSContext *ctx, JSValue num) { - return js_cell_number_fraction (ctx, JS_NULL, 1, &num); + double d; + if (js_cell_read_number_strict (num, &d) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, d - trunc (d)); } /* C API: min(a, b) - minimum of two numbers */ JSValue JS_CellMin (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_number_min (ctx, JS_NULL, 2, argv); + double da, db; + if (js_cell_read_number_strict (a, &da) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &db) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, da < db ? da : db); } /* C API: max(a, b) - maximum of two numbers */ JSValue JS_CellMax (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_number_max (ctx, JS_NULL, 2, argv); + double da, db; + if (js_cell_read_number_strict (a, &da) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &db) < 0) return JS_NULL; + return js_cell_number_from_double (ctx, da > db ? da : db); } /* C API: remainder(a, b) - remainder after division */ JSValue JS_CellRemainder (JSContext *ctx, JSValue a, JSValue b) { - JSValue argv[2] = { a, b }; - return js_cell_number_remainder (ctx, JS_NULL, 2, argv); + double dividend, divisor; + if (js_cell_read_number_strict (a, ÷nd) < 0) return JS_NULL; + if (js_cell_read_number_strict (b, &divisor) < 0) return JS_NULL; + if (divisor == 0.0) return JS_NULL; + return js_cell_number_from_double (ctx, + dividend - (trunc (dividend / divisor) * divisor)); } /* Object functions */ @@ -11348,7 +11416,7 @@ static void JS_AddIntrinsicBaseObjects (JSContext *ctx) { js_set_global_cfunc(ctx, "filter", js_cell_array_filter, 2); js_set_global_cfunc(ctx, "sort", js_cell_array_sort, 2); - /* Number utility functions */ + /* Number intrinsics: direct calls lower to mcode; globals remain for first-class use. */ js_set_global_cfunc(ctx, "whole", js_cell_number_whole, 1); js_set_global_cfunc(ctx, "fraction", js_cell_number_fraction, 1); js_set_global_cfunc(ctx, "floor", js_cell_number_floor, 2); @@ -12684,8 +12752,8 @@ JSValue JS_GetStack(JSContext *ctx) { if (!JS_IsFunction(frame->function)) break; JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function); - if (fn->kind == JS_FUNC_KIND_REGISTER && fn->u.reg.code) { - JSCodeRegister *code = fn->u.reg.code; + if (fn->kind == JS_FUNC_KIND_REGISTER && JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code) { + JSCodeRegister *code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code; uint32_t pc = is_first ? cur_pc : (uint32_t)(JS_VALUE_GET_INT(frame->address) >> 16); frames[count].fn = code->name_cstr; frames[count].file = code->filename_cstr; diff --git a/streamline.cm b/streamline.cm index 545a1862..9fe9adb5 100644 --- a/streamline.cm +++ b/streamline.cm @@ -37,7 +37,8 @@ var streamline = function(ir, log) { var numeric_ops = { add: true, subtract: true, multiply: true, - divide: true, modulo: true, pow: true + divide: true, modulo: true, remainder: true, + max: true, min: true, pow: true } var bool_result_ops = { eq_int: true, ne_int: true, lt_int: true, gt_int: true, @@ -229,7 +230,12 @@ var streamline = function(ir, log) { add: [2, T_NUM, 3, T_NUM], subtract: [2, T_NUM, 3, T_NUM], multiply: [2, T_NUM, 3, T_NUM], divide: [2, T_NUM, 3, T_NUM], modulo: [2, T_NUM, 3, T_NUM], - pow: [2, T_NUM, 3, T_NUM], negate: [2, T_NUM], + remainder: [2, T_NUM, 3, T_NUM], max: [2, T_NUM, 3, T_NUM], + min: [2, T_NUM, 3, T_NUM], pow: [2, T_NUM, 3, T_NUM], + negate: [2, T_NUM], abs: [2, T_NUM], sign: [2, T_NUM], + fraction: [2, T_NUM], integer: [2, T_NUM], + floor: [2, T_NUM], ceiling: [2, T_NUM], + round: [2, T_NUM], trunc: [2, T_NUM], bitand: [2, T_INT, 3, T_INT], bitor: [2, T_INT, 3, T_INT], bitxor: [2, T_INT, 3, T_INT], shl: [2, T_INT, 3, T_INT], shr: [2, T_INT, 3, T_INT], ushr: [2, T_INT, 3, T_INT], @@ -250,8 +256,13 @@ var streamline = function(ir, log) { var param_types = null var i = 0 var j = 0 + var iter = 0 var instr = null var bt = null + var src = 0 + var dst = 0 + var old_bt = null + var changed = false var rule = null if (instructions == null || nr_args == 0) { @@ -275,6 +286,31 @@ var streamline = function(ir, log) { i = i + 1 } + // Propagate typed constraints backward through move chains. + changed = true + iter = 0 + while (changed && iter < num_instr + 4) { + changed = false + i = 0 + while (i < num_instr) { + instr = instructions[i] + if (is_array(instr) && instr[0] == "move") { + dst = instr[1] + src = instr[2] + bt = backward_types[dst] + if (bt != null && bt != T_UNKNOWN) { + old_bt = backward_types[src] + merge_backward(backward_types, src, bt) + if (backward_types[src] != old_bt) { + changed = true + } + } + } + i = i + 1 + } + iter = iter + 1 + } + param_types = array(func.nr_slots) j = 1 while (j <= nr_args) { @@ -302,10 +338,14 @@ var streamline = function(ir, log) { bitnot: [1, T_INT], bitand: [1, T_INT], bitor: [1, T_INT], bitxor: [1, T_INT], shl: [1, T_INT], shr: [1, T_INT], ushr: [1, T_INT], negate: [1, T_NUM], concat: [1, T_TEXT], + abs: [1, T_NUM], sign: [1, T_INT], fraction: [1, T_NUM], + integer: [1, T_NUM], floor: [1, T_NUM], ceiling: [1, T_NUM], + round: [1, T_NUM], trunc: [1, T_NUM], eq: [1, T_BOOL], ne: [1, T_BOOL], lt: [1, T_BOOL], le: [1, T_BOOL], gt: [1, T_BOOL], ge: [1, T_BOOL], in: [1, T_BOOL], add: [1, T_NUM], subtract: [1, T_NUM], multiply: [1, T_NUM], - divide: [1, T_NUM], modulo: [1, T_NUM], pow: [1, T_NUM], + divide: [1, T_NUM], modulo: [1, T_NUM], remainder: [1, T_NUM], + max: [1, T_NUM], min: [1, T_NUM], pow: [1, T_NUM], move: [1, T_UNKNOWN], load_field: [1, T_UNKNOWN], load_index: [1, T_UNKNOWN], load_dynamic: [1, T_UNKNOWN], pop: [1, T_UNKNOWN], get: [1, T_UNKNOWN], @@ -325,16 +365,35 @@ var streamline = function(ir, log) { is_record: [1, T_BOOL], is_stone: [1, T_BOOL] } - var infer_slot_write_types = function(func) { + // Known intrinsic return types for invoke result inference. + var intrinsic_return_types = { + abs: T_NUM, floor: T_NUM, ceiling: T_NUM, + round: T_NUM, trunc: T_NUM, fraction: T_NUM, + integer: T_NUM, whole: T_NUM, sign: T_NUM, + max: T_NUM, min: T_NUM, remainder: T_NUM, modulo: T_NUM + } + + var infer_slot_write_types = function(func, param_types) { var instructions = func.instructions var nr_args = func.nr_args != null ? func.nr_args : 0 var num_instr = 0 var write_types = null + var frame_callee = null + var intrinsic_slots = null + var move_dests = null + var move_srcs = null var i = 0 var k = 0 + var iter = 0 var instr = null + var op = null + var src = 0 var slot = 0 + var old_typ = null + var src_typ = null var typ = null + var callee_slot = null + var changed = false var rule = null var cw_keys = null @@ -344,11 +403,62 @@ var streamline = function(ir, log) { num_instr = length(instructions) write_types = array(func.nr_slots) + frame_callee = array(func.nr_slots) + intrinsic_slots = array(func.nr_slots) + move_dests = [] + move_srcs = [] i = 0 while (i < num_instr) { instr = instructions[i] if (is_array(instr)) { - rule = write_rules[instr[0]] + op = instr[0] + if (op == "access") { + slot = instr[1] + if (slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, access_value_type(instr[2])) + } + if (is_object(instr[2]) && instr[2].make == "intrinsic") { + typ = intrinsic_return_types[instr[2].name] + if (typ != null && slot >= 0 && slot < length(intrinsic_slots)) { + intrinsic_slots[slot] = typ + } + } + i = i + 1 + continue + } + if (op == "move") { + slot = instr[1] + if (slot > 0 && slot > nr_args) { + move_dests[] = slot + move_srcs[] = instr[2] + } + i = i + 1 + continue + } + if (op == "frame" || op == "goframe") { + if (is_number(instr[1]) && instr[1] >= 0 && instr[1] < length(frame_callee)) { + frame_callee[instr[1]] = instr[2] + } + i = i + 1 + continue + } + if (op == "invoke" || op == "tail_invoke") { + slot = instr[2] + typ = T_UNKNOWN + callee_slot = frame_callee[instr[1]] + if (is_number(callee_slot) && callee_slot >= 0 && callee_slot < length(intrinsic_slots)) { + if (intrinsic_slots[callee_slot] != null) { + typ = intrinsic_slots[callee_slot] + } + } + if (slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, typ) + } + i = i + 1 + continue + } + + rule = write_rules[op] if (rule != null) { slot = instr[rule[0]] typ = rule[1] @@ -363,6 +473,54 @@ var streamline = function(ir, log) { i = i + 1 } + // Resolve move writes from known source invariants (fixed-point). + changed = true + iter = 0 + while (changed && iter < length(write_types) + 4) { + changed = false + k = 0 + while (k < length(move_dests)) { + slot = move_dests[k] + src = move_srcs[k] + src_typ = null + if (is_number(src) && src >= 0) { + if (src < length(write_types) && write_types[src] != null) { + src_typ = write_types[src] + } else if (param_types != null && src < length(param_types) && param_types[src] != null) { + src_typ = param_types[src] + } + } + if (src_typ != null) { + old_typ = write_types[slot] + merge_backward(write_types, slot, src_typ) + if (write_types[slot] != old_typ) { + changed = true + } + } + k = k + 1 + } + iter = iter + 1 + } + + // Any remaining unresolved move write can carry arbitrary type. + k = 0 + while (k < length(move_dests)) { + slot = move_dests[k] + src = move_srcs[k] + src_typ = null + if (is_number(src) && src >= 0) { + if (src < length(write_types) && write_types[src] != null) { + src_typ = write_types[src] + } else if (param_types != null && src < length(param_types) && param_types[src] != null) { + src_typ = param_types[src] + } + } + if (src_typ == null && slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, T_UNKNOWN) + } + k = k + 1 + } + // Closure-written slots can have any type at runtime — mark unknown if (func.closure_written != null) { cw_keys = array(func.closure_written) @@ -976,6 +1134,94 @@ var streamline = function(ir, log) { return null } + // ========================================================= + // Pass: eliminate_unreachable_cfg — nop blocks not reachable + // from function entry under explicit jump control-flow. + // ========================================================= + var eliminate_unreachable_cfg = function(func) { + var instructions = func.instructions + var num_instr = 0 + var disruption_pc = -1 + var label_map = null + var reachable = null + var stack = null + var sp = 0 + var idx = 0 + var tgt = null + var instr = null + var op = null + var nc = 0 + + if (instructions == null || length(instructions) == 0) { + return null + } + + num_instr = length(instructions) + if (is_number(func.disruption_pc)) { + disruption_pc = func.disruption_pc + } + label_map = {} + idx = 0 + while (idx < num_instr) { + instr = instructions[idx] + if (is_text(instr) && !starts_with(instr, "_nop_")) { + label_map[instr] = idx + } + idx = idx + 1 + } + + reachable = array(num_instr, false) + stack = [0] + if (disruption_pc > 0 && disruption_pc < num_instr) { + stack[] = disruption_pc + } + + sp = 0 + while (sp < length(stack)) { + idx = stack[sp] + sp = sp + 1 + + if (idx < 0 || idx >= num_instr || reachable[idx]) { + continue + } + reachable[idx] = true + instr = instructions[idx] + + if (!is_array(instr)) { + stack[] = idx + 1 + continue + } + + op = instr[0] + if (op == "jump") { + tgt = label_map[instr[1]] + if (is_number(tgt)) stack[] = tgt + continue + } + if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") { + tgt = label_map[instr[2]] + if (is_number(tgt)) stack[] = tgt + stack[] = idx + 1 + continue + } + if (op == "return" || op == "disrupt") { + continue + } + stack[] = idx + 1 + } + + idx = 0 + while (idx < num_instr) { + if (!reachable[idx] && is_array(instructions[idx])) { + nc = nc + 1 + instructions[idx] = "_nop_ucfg_" + text(nc) + } + idx = idx + 1 + } + + return null + } + // ========================================================= // Pass: eliminate_dead_jumps — jump to next label → nop // ========================================================= @@ -1590,51 +1836,75 @@ var streamline = function(ir, log) { var param_types = null var write_types = null var slot_types = null + var run_cycle = function(suffix) { + var name = null + name = "infer_param_types" + suffix + run_pass(func, name, function() { + param_types = infer_param_types(func) + return param_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "infer_slot_write_types" + suffix + run_pass(func, name, function() { + write_types = infer_slot_write_types(func, param_types) + return write_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_type_checks" + suffix + run_pass(func, name, function() { + slot_types = eliminate_type_checks(func, param_types, write_types, log) + return slot_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + if (log != null && log.type_deltas != null && slot_types != null) { + log.type_deltas[] = { + fn: func.name, + cycle: suffix == "" ? 1 : 2, + param_types: param_types, + slot_types: slot_types + } + } + + name = "simplify_algebra" + suffix + run_pass(func, name, function() { + return simplify_algebra(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "simplify_booleans" + suffix + run_pass(func, name, function() { + return simplify_booleans(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_moves" + suffix + run_pass(func, name, function() { + return eliminate_moves(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_unreachable" + suffix + run_pass(func, name, function() { + return eliminate_unreachable(func) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_dead_jumps" + suffix + run_pass(func, name, function() { + return eliminate_dead_jumps(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + return null + } + if (func.instructions == null || length(func.instructions) == 0) { return null } - run_pass(func, "infer_param_types", function() { - param_types = infer_param_types(func) - return param_types - }) - if (verify_fn) verify_fn(func, "after infer_param_types") - run_pass(func, "infer_slot_write_types", function() { - write_types = infer_slot_write_types(func) - return write_types - }) - if (verify_fn) verify_fn(func, "after infer_slot_write_types") - run_pass(func, "eliminate_type_checks", function() { - slot_types = eliminate_type_checks(func, param_types, write_types, log) - return slot_types - }) - if (verify_fn) verify_fn(func, "after eliminate_type_checks") - if (log != null && log.type_deltas != null && slot_types != null) { - log.type_deltas[] = { - fn: func.name, - param_types: param_types, - slot_types: slot_types - } - } - run_pass(func, "simplify_algebra", function() { - return simplify_algebra(func, log) - }) - if (verify_fn) verify_fn(func, "after simplify_algebra") - run_pass(func, "simplify_booleans", function() { - return simplify_booleans(func, log) - }) - if (verify_fn) verify_fn(func, "after simplify_booleans") - run_pass(func, "eliminate_moves", function() { - return eliminate_moves(func, log) - }) - if (verify_fn) verify_fn(func, "after eliminate_moves") - run_pass(func, "eliminate_unreachable", function() { - return eliminate_unreachable(func) - }) - if (verify_fn) verify_fn(func, "after eliminate_unreachable") - run_pass(func, "eliminate_dead_jumps", function() { - return eliminate_dead_jumps(func, log) - }) - if (verify_fn) verify_fn(func, "after eliminate_dead_jumps") + + run_cycle("") return null } diff --git a/vm_suite.ce b/vm_suite.ce index f7ba705e..9220a308 100644 --- a/vm_suite.ce +++ b/vm_suite.ce @@ -827,6 +827,27 @@ run("disruption handler accesses object from outer scope", function() { if (obj.y != 20) fail("handler mutation lost, y=" + text(obj.y)) }) +run("disruption in callback with multiple calls after", function() { + // Regression: a function with a disruption handler that calls a + // callback which disrupts, followed by more successful calls. + // In native mode, cell_rt_disrupt must NOT use JS_ThrowTypeError + // (which prints to stderr) — it must silently set the exception. + var log = [] + var run_inner = function(name, fn) { + fn() + log[] = "pass:" + name + } disruption { + log[] = "fail:" + name + } + run_inner("a", function() { var x = 1 }) + run_inner("b", function() { disrupt }) + run_inner("c", function() { var y = 2 }) + if (length(log) != 3) fail("expected 3 log entries, got " + text(length(log))) + if (log[0] != "pass:a") fail("expected pass:a, got " + log[0]) + if (log[1] != "fail:b") fail("expected fail:b, got " + log[1]) + if (log[2] != "pass:c") fail("expected pass:c, got " + log[2]) +}) + // ============================================================================ // TYPE CHECKING WITH is_* FUNCTIONS // ============================================================================