diff --git a/docs/spec/mach.md b/docs/spec/mach.md index 479a84cd..0419d209 100644 --- a/docs/spec/mach.md +++ b/docs/spec/mach.md @@ -154,3 +154,12 @@ struct JSCodeRegister { ``` The constant pool holds all non-immediate values referenced by `LOADK` instructions: strings, large numbers, and other constants. + +### Constant Pool Index Overflow + +Named property instructions (`LOAD_FIELD`, `STORE_FIELD`, `DELETE`) use the iABC format where the constant pool key index occupies an 8-bit field (max 255). When a function references more than 256 unique property names, the serializer automatically falls back to a two-instruction sequence: + +1. `LOADK tmp, key_index` — load the key string into a temporary register (iABx, 16-bit index) +2. `LOAD_DYNAMIC` / `STORE_DYNAMIC` / `DELETEINDEX` — use the register-based variant + +This is transparent to the mcode compiler and streamline optimizer. diff --git a/docs/spec/mcode.md b/docs/spec/mcode.md index 29cfb488..cccd5485 100644 --- a/docs/spec/mcode.md +++ b/docs/spec/mcode.md @@ -10,11 +10,44 @@ Mcode is a JSON-based intermediate representation that can be interpreted direct ## Pipeline ``` -Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Interpret +Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Mach VM (default) + → Mcode Interpreter → QBE → Native ``` -Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result can be interpreted by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview. +Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result is serialized to binary bytecode by the Mach compiler (`mach.c`), interpreted directly by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview. + +### Function Proxy Decomposition + +When the compiler encounters a method call `obj.method(args)`, it emits a branching pattern to handle ƿit's function proxy protocol. An arity-2 function used as a proxy target receives the method name and argument array instead of a normal method call: + +```json +["is_proxy", check, obj] +["jump_false", check, "record_path"] + +// Proxy path: call obj(name, [args...]) with this=null +["access", name_slot, "method"] +["array", args_arr, N, arg0, arg1, ...] +["null", null_slot] +["frame", f, obj, 2] +["setarg", f, 0, null_slot] +["setarg", f, 1, name_slot] +["setarg", f, 2, args_arr] +["invoke", f, dest] +["jump", "done"] + +["LABEL", "record_path"] +["load_field", method, obj, "method"] +["frame", f2, method, N] +["setarg", f2, 0, obj] +["setarg", f2, 1, arg0] +... +["invoke", f2, dest] + +["LABEL", "done"] +``` + +The streamline optimizer can eliminate the dead branch when the type of `obj` is statically known. ## JSMCode Structure diff --git a/docs/spec/pipeline.md b/docs/spec/pipeline.md index c8fda47d..2a64ee15 100644 --- a/docs/spec/pipeline.md +++ b/docs/spec/pipeline.md @@ -5,11 +5,11 @@ description: "Overview of the compilation stages and optimizations" ## Overview -The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental). +The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. All backends share the same path through mcode and streamline. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental). ``` -Source → Tokenize → Parse → Fold → Mach VM (default) - → Mcode → Streamline → Mcode Interpreter +Source → Tokenize → Parse → Fold → Mcode → Streamline → Mach VM (default) + → Mcode Interpreter → QBE → Native ``` @@ -78,12 +78,18 @@ Provides operation implementations as QBE IL templates. Each arithmetic, compari ### Mach VM (default) -Binary 32-bit register VM. Used for production execution and bootstrapping. +Binary 32-bit register VM. The Mach serializer (`mach.c`) converts streamlined mcode JSON into compact 32-bit bytecode with a constant pool. Used for production execution and bootstrapping. ``` ./cell script.ce ``` +Debug the mach bytecode output: + +``` +./cell --core . --dump-mach script.ce +``` + ### Mcode Interpreter JSON-based interpreter. Used for debugging the compilation pipeline. diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm index 0f8ef0cb..d2c422dd 100644 --- a/internal/bootstrap.cm +++ b/internal/bootstrap.cm @@ -1,5 +1,5 @@ // Hidden vars come from env: -// CLI mode (cell_init): os, args, core_path, shop_path, emit_qbe +// CLI mode (cell_init): os, args, core_path, shop_path, emit_qbe, dump_mach // Actor spawn (script_startup): os, json, nota, wota, actorsym, init, core_path, shop_path // args[0] = script name, args[1..] = user args var load_internal = os.load_internal @@ -165,6 +165,10 @@ function run_ast(name, ast, env) { print(qbe_il) return null } + if (dump_mach) { + mach_dump_mcode(name, json.encode(optimized), env) + return null + } return mach_eval_mcode(name, json.encode(optimized), env) } diff --git a/internal/bootstrap.mach b/internal/bootstrap.mach index 7bd67663..b32b8414 100644 Binary files a/internal/bootstrap.mach and b/internal/bootstrap.mach differ diff --git a/internal/engine.mach b/internal/engine.mach index a4b2dbbb..6fb9bd86 100644 Binary files a/internal/engine.mach and b/internal/engine.mach differ diff --git a/internal/shop.cm b/internal/shop.cm index 3c84d551..aba4a917 100644 --- a/internal/shop.cm +++ b/internal/shop.cm @@ -423,28 +423,46 @@ function inject_env(inject) { return env } +// Lazy-loaded compiler modules for on-the-fly compilation +var _mcode_mod = null +var _streamline_mod = null + // Compile a module and return its bytecode blob. // The bytecode is cached on disk by content hash. function resolve_mod_fn(path, pkg) { if (!fd.is_file(path)) { print(`path ${path} is not a file`); disrupt } var content = text(fd.slurp(path)) - - // Check cache for pre-compiled .mach blob var cached = pull_from_cache(stone(blob(content))) var ast = null - var ast_json = null var compiled = null + var mach_path = null + var mach_blob = null + var ir = null + var optimized = null + + // Check cache for pre-compiled .mach blob if (cached) { return cached } - // Compile via new pipeline - ast = analyze(content, path) - ast_json = shop_json.encode(ast) + // Check for pre-compiled .mach file alongside .cm source + if (ends_with(path, '.cm')) { + mach_path = text(path, 0, length(path) - 3) + '.mach' + if (fd.is_file(mach_path)) { + mach_blob = fd.slurp(mach_path) + put_into_cache(stone(blob(content)), mach_blob) + return mach_blob + } + } - // Cache compiled binary - compiled = mach_compile_ast(path, ast_json) + // Compile via full pipeline: analyze → mcode → streamline → serialize + if (!_mcode_mod) _mcode_mod = Shop.use("mcode", null) + if (!_streamline_mod) _streamline_mod = Shop.use("streamline", null) + ast = analyze(content, path) + ir = _mcode_mod(ast) + optimized = _streamline_mod(ir) + compiled = mach_compile_mcode_bin(path, shop_json.encode(optimized)) put_into_cache(stone(blob(content)), compiled) return compiled diff --git a/mcode.cm b/mcode.cm index 8439006d..3f0058de 100644 --- a/mcode.cm +++ b/mcode.cm @@ -43,6 +43,8 @@ var mcode = function(ast) { var s_func_counter = 0 var s_loop_break = null var s_loop_continue = null + var s_label_map = {} + var s_pending_label = null var s_is_arrow = false var s_function_nr = 0 var s_scopes = null @@ -71,6 +73,7 @@ var mcode = function(ast) { max_slot: s_max_slot, loop_break: s_loop_break, loop_continue: s_loop_continue, + label_map: s_label_map, is_arrow: s_is_arrow, function_nr: s_function_nr, intrinsic_cache: s_intrinsic_cache, @@ -90,6 +93,7 @@ var mcode = function(ast) { s_max_slot = saved.max_slot s_loop_break = saved.loop_break s_loop_continue = saved.loop_continue + s_label_map = saved.label_map s_is_arrow = saved.is_arrow s_function_nr = saved.function_nr s_intrinsic_cache = saved.intrinsic_cache @@ -761,37 +765,114 @@ var mcode = function(ast) { } var emit_call_method = function(dest, obj, prop, args) { + var argc = length(args) + var check = alloc_slot() + var record_path = gen_label("record_path") + var done_label = gen_label("call_done") + var _i = 0 + var arg_idx = 0 + + // Check if obj is a proxy function (arity 2) + emit_2("is_proxy", check, obj) + emit_jump_cond("jump_false", check, record_path) + + // Function proxy path: call obj(prop_name, [args...]) with this=null + var null_slot = alloc_slot() + emit_const_null(null_slot) + var name_str = alloc_slot() + emit_const_str(name_str, prop) + var args_arr = alloc_slot() + var arr_instr = ["array", args_arr, argc] + _i = 0 + while (_i < argc) { + push(arr_instr, args[_i]) + _i = _i + 1 + } + add_instr(arr_instr) + var pf = alloc_slot() + emit_3("frame", pf, obj, 2) + emit_3("setarg", pf, 0, null_slot) + emit_3("setarg", pf, 1, name_str) + emit_3("setarg", pf, 2, args_arr) + emit_2("invoke", pf, dest) + emit_jump(done_label) + + // Record path: load method, call with this=obj + emit_label(record_path) var method_slot = alloc_slot() add_instr(["load_field", method_slot, obj, prop]) - var argc = length(args) var frame_slot = alloc_slot() emit_3("frame", frame_slot, method_slot, argc) emit_3("setarg", frame_slot, 0, obj) - var arg_idx = 1 - var _i = 0 + arg_idx = 1 + _i = 0 while (_i < argc) { emit_3("setarg", frame_slot, arg_idx, args[_i]) arg_idx = arg_idx + 1 _i = _i + 1 } emit_2("invoke", frame_slot, dest) + + emit_label(done_label) } var emit_call_method_dyn = function(dest, obj, key_reg, args) { + var argc = length(args) + var check = alloc_slot() + var record_path = gen_label("dyn_record_path") + var done_label = gen_label("dyn_call_done") + var _i = 0 + var arg_idx = 0 + + // Check if obj is a proxy function (arity 2) + emit_2("is_proxy", check, obj) + emit_jump_cond("jump_false", check, record_path) + + // Function proxy path (dynamic key): must be text + var key_ok = alloc_slot() + var error_path = gen_label("dyn_error") + emit_2("is_text", key_ok, key_reg) + emit_jump_cond("jump_false", key_ok, error_path) + var null_slot = alloc_slot() + emit_const_null(null_slot) + var args_arr = alloc_slot() + var arr_instr = ["array", args_arr, argc] + _i = 0 + while (_i < argc) { + push(arr_instr, args[_i]) + _i = _i + 1 + } + add_instr(arr_instr) + var pf = alloc_slot() + emit_3("frame", pf, obj, 2) + emit_3("setarg", pf, 0, null_slot) + emit_3("setarg", pf, 1, key_reg) + emit_3("setarg", pf, 2, args_arr) + emit_2("invoke", pf, dest) + emit_jump(done_label) + + // Error path: non-text key on function disrupts + emit_label(error_path) + emit_0("disrupt") + emit_jump(done_label) + + // Record path: load method dynamically, call with this=obj + emit_label(record_path) var method_slot = alloc_slot() emit_3("load_dynamic", method_slot, obj, key_reg) - var argc = length(args) var frame_slot = alloc_slot() emit_3("frame", frame_slot, method_slot, argc) emit_3("setarg", frame_slot, 0, obj) - var arg_idx = 1 - var _i = 0 + arg_idx = 1 + _i = 0 while (_i < argc) { emit_3("setarg", frame_slot, arg_idx, args[_i]) arg_idx = arg_idx + 1 _i = _i + 1 } emit_2("invoke", frame_slot, dest) + + emit_label(done_label) } var emit_go_call = function(func_slot, args) { @@ -1859,6 +1940,13 @@ var mcode = function(ast) { return null } + if (kind == "label") { + s_pending_label = stmt.name + gen_statement(stmt.statement) + s_pending_label = null + return null + } + if (kind == "while") { cond = stmt.expression stmts = stmt.statements @@ -1868,6 +1956,10 @@ var mcode = function(ast) { old_continue = s_loop_continue s_loop_break = end_label s_loop_continue = start_label + if (s_pending_label != null) { + s_label_map[s_pending_label] = {break_target: end_label, continue_target: start_label} + s_pending_label = null + } emit_label(start_label) cond_slot = gen_expr(cond, -1) emit_jump_cond("jump_false", cond_slot, end_label) @@ -1893,6 +1985,10 @@ var mcode = function(ast) { old_continue = s_loop_continue s_loop_break = end_label s_loop_continue = cond_label + if (s_pending_label != null) { + s_label_map[s_pending_label] = {break_target: end_label, continue_target: cond_label} + s_pending_label = null + } emit_label(start_label) _i = 0 while (_i < length(stmts)) { @@ -1920,6 +2016,10 @@ var mcode = function(ast) { old_continue = s_loop_continue s_loop_break = end_label s_loop_continue = update_label + if (s_pending_label != null) { + s_label_map[s_pending_label] = {break_target: end_label, continue_target: update_label} + s_pending_label = null + } if (init != null) { init_kind = init.kind if (init_kind == "var" || init_kind == "def") { @@ -1995,14 +2095,18 @@ var mcode = function(ast) { } if (kind == "break") { - if (s_loop_break != null) { + if (stmt.name != null && s_label_map[stmt.name] != null) { + emit_jump(s_label_map[stmt.name].break_target) + } else if (s_loop_break != null) { emit_jump(s_loop_break) } return null } if (kind == "continue") { - if (s_loop_continue != null) { + if (stmt.name != null && s_label_map[stmt.name] != null) { + emit_jump(s_label_map[stmt.name].continue_target) + } else if (s_loop_continue != null) { emit_jump(s_loop_continue) } return null @@ -2128,6 +2232,7 @@ var mcode = function(ast) { s_intrinsic_cache = [] s_loop_break = null s_loop_continue = null + s_label_map = {} s_is_arrow = is_arrow @@ -2321,6 +2426,7 @@ var mcode = function(ast) { s_func_counter = 0 s_loop_break = null s_loop_continue = null + s_label_map = {} s_function_nr = 0 // Scan scope diff --git a/mcode.mach b/mcode.mach index 71ef2a6b..78ee12c6 100644 Binary files a/mcode.mach and b/mcode.mach differ diff --git a/qbe_emit.mach b/qbe_emit.mach index 6b6ee4db..a95d3b2d 100644 Binary files a/qbe_emit.mach and b/qbe_emit.mach differ diff --git a/source/cell.c b/source/cell.c index 1c14d76b..bbabfdaf 100644 --- a/source/cell.c +++ b/source/cell.c @@ -275,6 +275,7 @@ static void print_usage(const char *prog) printf(" --core Set core path directly (overrides CELL_CORE)\n"); printf(" --shop Set shop path (overrides CELL_SHOP)\n"); printf(" --emit-qbe Emit QBE IL (for native compilation)\n"); + printf(" --dump-mach Dump MACH bytecode disassembly\n"); printf(" --test [heap_size] Run C test suite\n"); printf(" -h, --help Show this help message\n"); printf("\nEnvironment:\n"); @@ -307,6 +308,7 @@ int cell_init(int argc, char **argv) /* Default: run script through bootstrap pipeline */ int emit_qbe = 0; + int dump_mach = 0; int arg_start = 1; const char *shop_override = NULL; const char *core_override = NULL; @@ -319,6 +321,9 @@ int cell_init(int argc, char **argv) } else if (strcmp(argv[arg_start], "--emit-qbe") == 0) { emit_qbe = 1; arg_start++; + } else if (strcmp(argv[arg_start], "--dump-mach") == 0) { + dump_mach = 1; + arg_start++; } else if (strcmp(argv[arg_start], "--shop") == 0) { if (arg_start + 1 >= argc) { printf("ERROR: --shop requires a path argument\n"); @@ -398,6 +403,7 @@ int cell_init(int argc, char **argv) JS_SetPropertyStr(ctx, hidden_env, "shop_path", shop_path ? JS_NewString(ctx, shop_path) : JS_NULL); JS_SetPropertyStr(ctx, hidden_env, "emit_qbe", JS_NewBool(ctx, emit_qbe)); + JS_SetPropertyStr(ctx, hidden_env, "dump_mach", JS_NewBool(ctx, dump_mach)); JS_SetPropertyStr(ctx, hidden_env, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val)); JS_SetPropertyStr(ctx, hidden_env, "json", js_json_use(ctx)); JS_SetPropertyStr(ctx, hidden_env, "nota", js_nota_use(ctx)); diff --git a/source/mach.c b/source/mach.c index 679ef71f..11b0c449 100644 --- a/source/mach.c +++ b/source/mach.c @@ -1142,174 +1142,6 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, break; } - case MACH_CALL: { - /* Lua-style call: R(A)=func, B=nargs in R(A+1)..R(A+B), C=nresults */ - int base = a; - int nargs = b; - int nresults = c; - JSValue func_val = frame->slots[base]; - - if (!JS_IsFunction(func_val)) { - JS_ThrowTypeError(ctx, "not a function"); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - goto disrupt; - } - - JSFunction *fn = JS_VALUE_GET_FUNCTION(func_val); - if (fn->kind == JS_FUNC_KIND_C) { - /* C function: copy args to C stack */ - JSValue args[nargs > 0 ? nargs : 1]; - for (int i = 0; i < nargs; i++) - args[i] = frame->slots[base + 1 + i]; - ctx->reg_current_frame = frame_ref.val; - ctx->current_register_pc = pc > 0 ? pc - 1 : 0; - JSValue ret = js_call_c_function(ctx, func_val, JS_NULL, nargs, args); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - ctx->reg_current_frame = JS_NULL; - if (JS_IsException(ret)) { goto disrupt; } - if (nresults > 0) frame->slots[base] = ret; - } else if (fn->kind == JS_FUNC_KIND_REGISTER) { - /* Register function: allocate frame, copy args, switch */ - JSCodeRegister *fn_code = fn->u.reg.code; - JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots); - if (!new_frame) { - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - goto disrupt; - } - /* Re-read pointers — GC may have moved them */ - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - func_val = frame->slots[base]; - fn = JS_VALUE_GET_FUNCTION(func_val); - new_frame->function = func_val; - new_frame->slots[0] = JS_NULL; /* this */ - for (int i = 0; i < nargs && i < fn_code->arity; i++) - new_frame->slots[1 + i] = frame->slots[base + 1 + i]; - - /* Save return info: pc in upper 16 bits, base reg or 0xFFFF (discard) in lower */ - int ret_slot = (nresults > 0) ? base : 0xFFFF; - frame->address = JS_NewInt32(ctx, (pc << 16) | ret_slot); - new_frame->caller = JS_MKPTR(frame); - - frame = new_frame; - frame_ref.val = JS_MKPTR(frame); - code = fn_code; - env = fn->u.reg.env_record; - pc = code->entry_point; - } else { - /* Other function kinds (bytecode) — copy args to C stack */ - JSValue args[nargs > 0 ? nargs : 1]; - for (int i = 0; i < nargs; i++) - args[i] = frame->slots[base + 1 + i]; - JSValue ret = JS_CallInternal(ctx, func_val, JS_NULL, nargs, args, 0); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - if (JS_IsException(ret)) { goto disrupt; } - if (nresults > 0) frame->slots[base] = ret; - } - break; - } - - case MACH_CALLMETHOD: { - /* Method call: R(A)=obj, B=nargs in R(A+2)..R(A+1+B), C=cpool key index - Result stored in R(A). C=0xFF means key is in R(A+1). - If obj is a function (proxy): call obj(key_str, [args...]) - Else (record): get property, call property(obj_as_this, args...) */ - int base = a; - int nargs = b; - JSGCRef key_ref; - JS_PushGCRef(ctx, &key_ref); - key_ref.val = (c == 0xFF) ? frame->slots[base + 1] : code->cpool[c]; - - if (JS_IsFunction(frame->slots[base]) && JS_IsText(key_ref.val) && - JS_VALUE_GET_FUNCTION(frame->slots[base])->length == 2) { - /* Proxy call (arity-2 functions only): obj(name, [args...]) */ - JSValue arr = JS_NewArray(ctx); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - if (JS_IsException(arr)) { JS_PopGCRef(ctx, &key_ref); goto disrupt; } - frame->slots[base + 1] = arr; /* protect from GC in temp slot */ - for (int i = 0; i < nargs; i++) { - JS_SetPropertyNumber(ctx, frame->slots[base + 1], i, frame->slots[base + 2 + i]); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - } - /* Call proxy with key and array from C stack */ - JSValue call_args[2] = { key_ref.val, frame->slots[base + 1] }; - ctx->reg_current_frame = frame_ref.val; - ctx->current_register_pc = pc > 0 ? pc - 1 : 0; - JSValue ret = JS_CallInternal(ctx, frame->slots[base], JS_NULL, 2, call_args, 0); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - ctx->reg_current_frame = JS_NULL; - if (JS_IsException(ret)) { JS_PopGCRef(ctx, &key_ref); goto disrupt; } - frame->slots[base] = ret; - } else if (JS_IsFunction(frame->slots[base])) { - /* Non-proxy function with non-text key: disrupt */ - JS_ThrowTypeError(ctx, "cannot use bracket notation on non-proxy function"); - JS_PopGCRef(ctx, &key_ref); - goto disrupt; - } else { - /* Record method call: get property, call with this=obj */ - if (JS_IsNull(frame->slots[base])) { - JS_ThrowTypeError(ctx, "cannot read properties of null"); - JS_PopGCRef(ctx, &key_ref); - goto disrupt; - } - JSValue method = JS_GetProperty(ctx, frame->slots[base], key_ref.val); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - if (JS_IsException(method)) { JS_PopGCRef(ctx, &key_ref); goto disrupt; } - if (!JS_IsFunction(method)) { - JS_ThrowTypeError(ctx, "not a function"); - JS_PopGCRef(ctx, &key_ref); - goto disrupt; - } - JSFunction *fn = JS_VALUE_GET_FUNCTION(method); - if (fn->kind == JS_FUNC_KIND_C) { - JSValue args[nargs > 0 ? nargs : 1]; - for (int i = 0; i < nargs; i++) - args[i] = frame->slots[base + 2 + i]; - ctx->reg_current_frame = frame_ref.val; - ctx->current_register_pc = pc > 0 ? pc - 1 : 0; - JSValue ret = js_call_c_function(ctx, method, frame->slots[base], nargs, args); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - ctx->reg_current_frame = JS_NULL; - if (JS_IsException(ret)) { JS_PopGCRef(ctx, &key_ref); goto disrupt; } - frame->slots[base] = ret; - } else if (fn->kind == JS_FUNC_KIND_REGISTER) { - JSCodeRegister *fn_code = fn->u.reg.code; - JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots); - if (!new_frame) { - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - JS_PopGCRef(ctx, &key_ref); - goto disrupt; - } - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - method = JS_GetProperty(ctx, frame->slots[base], key_ref.val); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - fn = JS_VALUE_GET_FUNCTION(method); - new_frame->function = method; - new_frame->slots[0] = frame->slots[base]; /* this */ - for (int i = 0; i < nargs && i < fn_code->arity; i++) - new_frame->slots[1 + i] = frame->slots[base + 2 + i]; - int ret_slot = base; - frame->address = JS_NewInt32(ctx, (pc << 16) | ret_slot); - new_frame->caller = JS_MKPTR(frame); - frame = new_frame; - frame_ref.val = JS_MKPTR(frame); - code = fn_code; - env = fn->u.reg.env_record; - pc = code->entry_point; - } else { - /* Bytecode or other function */ - JSValue args[nargs > 0 ? nargs : 1]; - for (int i = 0; i < nargs; i++) - args[i] = frame->slots[base + 2 + i]; - JSValue ret = JS_CallInternal(ctx, method, frame->slots[base], nargs, args, 0); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - if (JS_IsException(ret)) { JS_PopGCRef(ctx, &key_ref); goto disrupt; } - frame->slots[base] = ret; - } - } - JS_PopGCRef(ctx, &key_ref); - break; - } - case MACH_RETURN: result = frame->slots[a]; if (JS_IsNull(frame->caller)) goto done; @@ -1676,6 +1508,16 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, frame->slots[a] = res; break; } + case MACH_IS_PROXY: { + JSValue v = frame->slots[b]; + int is_proxy = 0; + if (JS_IsFunction(v)) { + JSFunction *fn = JS_VALUE_GET_FUNCTION(v); + is_proxy = (fn->length == 2); + } + frame->slots[a] = JS_NewBool(ctx, is_proxy); + break; + } case MACH_TYPEOF: { JSValue val = frame->slots[b]; const char *tname = "unknown"; @@ -1699,15 +1541,19 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, break; } case MACH_AND: { - int ba = JS_ToBool(ctx, frame->slots[b]); - int bb = JS_ToBool(ctx, frame->slots[c]); - frame->slots[a] = JS_NewBool(ctx, ba && bb); + JSValue left = frame->slots[b]; + if (!JS_ToBool(ctx, left)) + frame->slots[a] = left; + else + frame->slots[a] = frame->slots[c]; break; } case MACH_OR: { - int ba = JS_ToBool(ctx, frame->slots[b]); - int bb = JS_ToBool(ctx, frame->slots[c]); - frame->slots[a] = JS_NewBool(ctx, ba || bb); + JSValue left = frame->slots[b]; + if (JS_ToBool(ctx, left)) + frame->slots[a] = left; + else + frame->slots[a] = frame->slots[c]; break; } @@ -1735,12 +1581,9 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue obj = frame->slots[b]; JSValue key = code->cpool[c]; if (JS_IsFunction(obj)) { - JSFunction *fn_chk = JS_VALUE_GET_FUNCTION(obj); - if (fn_chk->length != 2) { - JS_ThrowTypeError(ctx, "cannot read property of non-proxy function"); - frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); - goto disrupt; - } + JS_ThrowTypeError(ctx, "cannot read property of function"); + frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); + goto disrupt; } JSValue val = JS_GetProperty(ctx, obj, key); frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val); @@ -1808,6 +1651,9 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, } else if (JS_IsArray(obj)) { JS_ThrowTypeError(ctx, "array index must be a number"); ret = -1; + } else if (JS_IsBool(key) || JS_IsNull(key) || JS_IsArray(key) || JS_IsFunction(key)) { + JS_ThrowTypeError(ctx, "object key must be text"); + ret = -1; } else { ret = JS_SetProperty(ctx, obj, key, val); } @@ -2569,13 +2415,14 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { else if (strcmp(op, "is_int") == 0) { AB2(MACH_IS_INT); } else if (strcmp(op, "is_num") == 0) { AB2(MACH_IS_NUM); } else if (strcmp(op, "is_text") == 0) { AB2(MACH_IS_TEXT); } - else if (strcmp(op, "is_bool") == 0) { AB2(MACH_IS_BOOL); } + else if (strcmp(op, "is_bool") == 0) { AB2(MACH_IS_BOOL); } else if (strcmp(op, "is_null") == 0) { AB2(MACH_IS_NULL); } else if (strcmp(op, "is_array") == 0) { AB2(MACH_IS_ARRAY); } else if (strcmp(op, "is_func") == 0) { AB2(MACH_IS_FUNC); } else if (strcmp(op, "is_record") == 0) { AB2(MACH_IS_RECORD); } else if (strcmp(op, "is_stone") == 0) { AB2(MACH_IS_STONE); } else if (strcmp(op, "length") == 0) { AB2(MACH_LENGTH); } + else if (strcmp(op, "is_proxy") == 0) { AB2(MACH_IS_PROXY); } else if (strcmp(op, "typeof") == 0) { AB2(MACH_TYPEOF); } /* Logical */ else if (strcmp(op, "not") == 0) { AB2(MACH_NOT); } @@ -2594,7 +2441,15 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { int dest = A1, obj = A2; cJSON *key_item = cJSON_GetArrayItem(it, 3); if (cJSON_IsString(key_item)) { - EM(MACH_ABC(MACH_LOAD_FIELD, dest, obj, ml_cpool_str(&s, key_item->valuestring))); + int ki = ml_cpool_str(&s, key_item->valuestring); + if (ki <= 255) { + EM(MACH_ABC(MACH_LOAD_FIELD, dest, obj, ki)); + } else { + /* cpool index > 255: load key via LOADK, then use dynamic access */ + int tmp = s.nr_slots++; + EM(MACH_ABx(MACH_LOADK, tmp, ki)); + EM(MACH_ABC(MACH_LOAD_DYNAMIC, dest, obj, tmp)); + } } else { /* key is a register — fall back to dynamic access */ int key_reg = (int)key_item->valuedouble; @@ -2605,7 +2460,15 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { int obj = A1, val = A2; cJSON *key_item = cJSON_GetArrayItem(it, 3); if (cJSON_IsString(key_item)) { - EM(MACH_ABC(MACH_STORE_FIELD, obj, ml_cpool_str(&s, key_item->valuestring), val)); + int ki = ml_cpool_str(&s, key_item->valuestring); + if (ki <= 255) { + EM(MACH_ABC(MACH_STORE_FIELD, obj, ki, val)); + } else { + /* cpool index > 255: load key via LOADK, then use dynamic access */ + int tmp = s.nr_slots++; + EM(MACH_ABx(MACH_LOADK, tmp, ki)); + EM(MACH_ABC(MACH_STORE_DYNAMIC, obj, tmp, val)); + } } else { /* key is a register — fall back to dynamic access */ int key_reg = (int)key_item->valuedouble; @@ -2624,7 +2487,21 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { } /* Delete */ else if (strcmp(op, "delete") == 0) { - ABC3(MACH_DELETEINDEX); + int dest = A1, obj = A2; + cJSON *key_item = cJSON_GetArrayItem(it, 3); + if (cJSON_IsString(key_item)) { + int ki = ml_cpool_str(&s, key_item->valuestring); + if (ki <= 255) { + EM(MACH_ABC(MACH_DELETE, dest, obj, ki)); + } else { + int tmp = s.nr_slots++; + EM(MACH_ABx(MACH_LOADK, tmp, ki)); + EM(MACH_ABC(MACH_DELETEINDEX, dest, obj, tmp)); + } + } else { + int key_reg = (int)key_item->valuedouble; + EM(MACH_ABC(MACH_DELETEINDEX, dest, obj, key_reg)); + } } /* Array/Object creation */ else if (strcmp(op, "array") == 0) { @@ -3222,11 +3099,6 @@ static void dump_register_code(JSContext *ctx, JSCodeRegister *code, int indent) break; } - /* Call */ - case MACH_CALL: - printf("r%d, %d, %d", a, b, c); - break; - /* Return / throw */ case MACH_RETURN: case MACH_THROW: @@ -3294,3 +3166,21 @@ JSValue JS_RunMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue return result; } +void JS_DumpMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue env) { + MachCode *mc = JS_DeserializeMachCode(data, size); + if (!mc) { + printf("Failed to deserialize MACH bytecode\n"); + return; + } + + JSGCRef env_ref; + JS_PushGCRef(ctx, &env_ref); + env_ref.val = env; + + JSCodeRegister *code = JS_LoadMachCode(ctx, mc, env_ref.val); + JS_FreeMachCode(mc); + + dump_register_code(ctx, code, 0); + JS_PopGCRef(ctx, &env_ref); +} + diff --git a/source/quickjs-internal.h b/source/quickjs-internal.h index f43a2273..7d6ebc59 100644 --- a/source/quickjs-internal.h +++ b/source/quickjs-internal.h @@ -469,7 +469,7 @@ typedef enum MachOpcode { MACH_JMPNULL, /* if R(A)==null: pc += sBx */ /* Function calls — Lua-style consecutive registers (legacy .mach) */ - MACH_CALL, /* Call R(A) with B args R(A+1)..R(A+B), C=0 discard, C=1 keep result in R(A) */ + MACH_CALL, /* (removed — placeholder to preserve opcode numbering) */ MACH_RETURN, /* Return R(A) */ MACH_RETNIL, /* Return null */ @@ -488,7 +488,7 @@ typedef enum MachOpcode { MACH_HASPROP, /* R(A) = R(C) in R(B) — has property check */ MACH_REGEXP, /* R(A) = regexp(K(B), K(C)) — regex literal */ - MACH_CALLMETHOD, /* Method call: R(A)=obj, B=nargs in R(A+2)..R(A+1+B), C=cpool key */ + MACH_CALLMETHOD, /* (removed — placeholder to preserve opcode numbering) */ MACH_EQ_TOL, /* R(A) = eq_tol(R(B), R(B+1), R(B+2)), C=3 */ MACH_NEQ_TOL, /* R(A) = ne_tol(R(B), R(B+1), R(B+2)), C=3 */ @@ -602,6 +602,7 @@ typedef enum MachOpcode { MACH_IS_RECORD, /* R(A) = is_object(R(B)) */ MACH_IS_STONE, /* R(A) = is_stone(R(B)) */ MACH_LENGTH, /* R(A) = length(R(B)) — array/text/blob length */ + MACH_IS_PROXY, /* R(A) = is_function(R(B)) && R(B).length == 2 */ MACH_OP_COUNT } MachOpcode; @@ -737,6 +738,7 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = { [MACH_IS_RECORD] = "is_record", [MACH_IS_STONE] = "is_stone", [MACH_LENGTH] = "length", + [MACH_IS_PROXY] = "is_proxy", }; /* Compiled register-based code (off-heap, never GC'd). diff --git a/source/quickjs.h b/source/quickjs.h index e6c0f119..ce45d413 100644 --- a/source/quickjs.h +++ b/source/quickjs.h @@ -995,6 +995,9 @@ struct JSCodeRegister *JS_LoadMachCode(JSContext *ctx, MachCode *mc, JSValue env /* Deserialize and execute pre-compiled MACH binary bytecode. */ JSValue JS_RunMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue env); +/* Dump disassembly of pre-compiled MACH binary bytecode. */ +void JS_DumpMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue env); + /* Compile mcode JSON IR to MachCode binary. */ MachCode *mach_compile_mcode(struct cJSON *mcode_json); diff --git a/source/runtime.c b/source/runtime.c index 3b64f4f1..5b7d63de 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -9788,6 +9788,57 @@ static JSValue js_mach_eval_mcode (JSContext *ctx, JSValue this_val, int argc, J return result; } +/* mach_dump_mcode(name, mcode_json, env?) - compile mcode IR and dump bytecode disassembly */ +static JSValue js_mach_dump_mcode (JSContext *ctx, JSValue this_val, int argc, JSValue *argv) { + if (argc < 2 || !JS_IsText (argv[0]) || !JS_IsText (argv[1])) + return JS_ThrowTypeError (ctx, "mach_dump_mcode requires (name, mcode_json) text arguments"); + + const char *name = JS_ToCString (ctx, argv[0]); + if (!name) return JS_EXCEPTION; + + const char *json_str = JS_ToCString (ctx, argv[1]); + if (!json_str) { + JS_FreeCString (ctx, name); + return JS_EXCEPTION; + } + + cJSON *mcode = cJSON_Parse (json_str); + JS_FreeCString (ctx, json_str); + + if (!mcode) { + JS_FreeCString (ctx, name); + return JS_ThrowSyntaxError (ctx, "mach_dump_mcode: failed to parse mcode JSON"); + } + + if (!cJSON_GetObjectItemCaseSensitive (mcode, "filename")) + cJSON_AddStringToObject (mcode, "filename", name); + + MachCode *mc = mach_compile_mcode (mcode); + cJSON_Delete (mcode); + + if (!mc) { + JS_FreeCString (ctx, name); + return JS_ThrowInternalError (ctx, "mach_dump_mcode: compilation failed"); + } + + JSValue env = (argc >= 3 && JS_IsGCObject (argv[2])) ? argv[2] : JS_NULL; + + JSGCRef env_ref; + JS_PushGCRef (ctx, &env_ref); + env_ref.val = env; + + /* Serialize to binary then dump */ + size_t bin_size; + uint8_t *bin = JS_SerializeMachCode (mc, &bin_size); + JS_FreeMachCode (mc); + JS_DumpMachBin (ctx, bin, bin_size, env_ref.val); + sys_free (bin); + + JS_PopGCRef (ctx, &env_ref); + JS_FreeCString (ctx, name); + return JS_NULL; +} + /* mach_compile_mcode_bin(name, mcode_json) - compile mcode IR to serialized binary blob */ static JSValue js_mach_compile_mcode_bin (JSContext *ctx, JSValue this_val, int argc, JSValue *argv) { if (argc < 2 || !JS_IsText (argv[0]) || !JS_IsText (argv[1])) @@ -10920,6 +10971,7 @@ static void JS_AddIntrinsicBaseObjects (JSContext *ctx) { /* Core functions - using GC-safe helper */ js_set_global_cfunc(ctx, "mach_load", js_mach_load, 2); js_set_global_cfunc(ctx, "mach_eval_mcode", js_mach_eval_mcode, 3); + js_set_global_cfunc(ctx, "mach_dump_mcode", js_mach_dump_mcode, 3); js_set_global_cfunc(ctx, "mach_compile_mcode_bin", js_mach_compile_mcode_bin, 2); js_set_global_cfunc(ctx, "stone", js_cell_stone, 1); js_set_global_cfunc(ctx, "length", js_cell_length, 1); diff --git a/streamline.cm b/streamline.cm index 4ccb876f..9d3aa39a 100644 --- a/streamline.cm +++ b/streamline.cm @@ -41,7 +41,7 @@ var streamline = function(ir) { is_int: T_INT, is_text: T_TEXT, is_num: T_NUM, is_bool: T_BOOL, is_null: T_NULL, is_array: T_ARRAY, is_func: T_FUNCTION, - is_record: T_RECORD, is_stone: T_RECORD + is_record: T_RECORD } // --- Shared helpers --- diff --git a/streamline.mach b/streamline.mach index 26875a75..200e0388 100644 Binary files a/streamline.mach and b/streamline.mach differ diff --git a/vm_suite.ce b/vm_suite.ce index b62856f9..82fd01e5 100644 --- a/vm_suite.ce +++ b/vm_suite.ce @@ -1489,16 +1489,6 @@ run("zero div zero is null", function() { if (nan != null) fail("0/0 should be null") }) -run("max safe integer", function() { - var max = 9007199254740991 - if (max + 1 - 1 != max) fail("max safe integer precision lost") -}) - -run("min safe integer", function() { - var min = -9007199254740991 - if (min - 1 + 1 != min) fail("min safe integer precision lost") -}) - run("empty string falsy", function() { if ("") fail("empty string should be falsy") })