diff --git a/docs/spec/mcode.md b/docs/spec/mcode.md index 9f5f1712..29cfb488 100644 --- a/docs/spec/mcode.md +++ b/docs/spec/mcode.md @@ -10,12 +10,11 @@ Mcode is a JSON-based intermediate representation that can be interpreted direct ## Pipeline ``` -Source → Tokenize → Parse (AST) → Mcode (JSON) → Interpret - → Compile to Mach (planned) - → Compile to native (planned) +Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Interpret + → QBE → Native ``` -Mcode is produced by the `JS_Mcode` compiler pass, which emits a cJSON tree. The mcode interpreter walks this tree directly, dispatching on instruction name strings. +Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result can be interpreted by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview. ## JSMCode Structure @@ -44,16 +43,37 @@ struct JSMCode { ## Instruction Format -Each instruction is a JSON array. The first element is the instruction name (string), followed by operands: +Each instruction is a JSON array. The first element is the instruction name (string), followed by operands (typically `[op, dest, ...args, line, col]`): ```json -["LOADK", 0, 42] -["ADD", 2, 0, 1] -["JMPFALSE", 3, "else_label"] -["CALL", 0, 2, 1] +["access", 3, 5, 1, 9] +["load_index", 10, 4, 9, 5, 11] +["store_dynamic", 4, 11, 12, 6, 3] +["frame", 15, 14, 1, 7, 7] +["setarg", 15, 0, 16, 7, 7] +["invoke", 15, 13, 7, 7] ``` -The instruction set mirrors the Mach VM opcodes — same operations, same register semantics, but with string dispatch instead of numeric opcodes. +### Typed Load/Store + +Memory operations come in typed variants for optimization: + +- `load_index dest, obj, idx` — array element by integer index +- `load_field dest, obj, key` — record property by string key +- `load_dynamic dest, obj, key` — unknown; dispatches at runtime +- `store_index obj, val, idx` — array element store +- `store_field obj, val, key` — record property store +- `store_dynamic obj, val, key` — unknown; dispatches at runtime + +The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold. + +### Decomposed Calls + +Function calls are split into separate instructions: + +- `frame dest, fn, argc` — allocate call frame +- `setarg frame, idx, val` — set argument +- `invoke frame, result` — execute the call ## Labels diff --git a/docs/spec/pipeline.md b/docs/spec/pipeline.md new file mode 100644 index 00000000..fe1240ba --- /dev/null +++ b/docs/spec/pipeline.md @@ -0,0 +1,118 @@ +--- +title: "Compilation Pipeline" +description: "Overview of the compilation stages and optimizations" +--- + +## Overview + +The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental). + +``` +Source → Tokenize → Parse → Fold → Mach VM (default) + → Mcode → Streamline → Mcode Interpreter + → QBE → Native +``` + +## Stages + +### Tokenize (`tokenize.cm`) + +Splits source text into tokens. Handles string interpolation by re-tokenizing template literal contents. Produces a token array with position information (line, column). + +### Parse (`parse.cm`) + +Converts tokens into an AST. Also performs semantic analysis: + +- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`. +- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`, `"null"`. +- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`. +- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise. +- **Tail position**: Return statements where the expression is a call get `tail: true`. + +### Fold (`fold.cm`) + +Operates on the AST. Performs constant folding and type analysis: + +- **Constant folding**: Evaluates arithmetic on known constants at compile time (e.g., `5 + 10` becomes `15`). +- **Constant propagation**: Tracks `def` bindings whose values are known constants. +- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites. +- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`. +- **Purity marking**: Stamps `pure: true` on expressions with no side effects (literals, name references, arithmetic on pure operands). +- **Dead code elimination**: Removes unreachable branches when conditions are known constants. + +### Mcode (`mcode.cm`) + +Lowers the AST to a JSON-based intermediate representation with explicit operations. Key design principle: **every type check is an explicit instruction** so downstream optimizers can see and eliminate them. + +- **Typed load/store**: Emits `load_index` (array by integer), `load_field` (record by string), or `load_dynamic` (unknown) based on type information from fold. +- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call). +- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup. + +See [Mcode IR](mcode.md) for instruction format details. + +### Streamline (`streamline.cm`) + +Optimizes the Mcode IR. Operates per-function: + +- **Redundant instruction elimination**: Removes no-op patterns and redundant moves. +- **Dead code removal**: Eliminates instructions whose results are never used. +- **Type-based narrowing**: When type information is available, narrows `load_dynamic`/`store_dynamic` to typed variants. + +### QBE Emit (`qbe_emit.cm`) + +Lowers optimized Mcode IR to QBE intermediate language for native code compilation. Each Mcode function becomes a QBE function that calls into the cell runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.). + +String constants are interned in a data section. Integer constants are NaN-boxed inline. + +### QBE Macros (`qbe.cm`) + +Provides operation implementations as QBE IL templates. Each arithmetic, comparison, and type operation is defined as a function that emits the corresponding QBE instructions, handling type dispatch (integer, float, text paths) with proper guard checks. + +## Execution Backends + +### Mach VM (default) + +Binary 32-bit register VM. Used for production execution and bootstrapping. + +``` +./cell script.ce +``` + +### Mcode Interpreter + +JSON-based interpreter. Used for debugging the compilation pipeline. + +``` +./cell --mcode script.ce +``` + +### QBE Native (experimental) + +Generates QBE IL that can be compiled to native code. + +``` +./cell --emit-qbe script.ce > output.ssa +``` + +## Files + +| File | Role | +|------|------| +| `tokenize.cm` | Lexer | +| `parse.cm` | Parser + semantic analysis | +| `fold.cm` | Constant folding + type analysis | +| `mcode.cm` | AST → Mcode IR lowering | +| `streamline.cm` | Mcode IR optimizer | +| `qbe_emit.cm` | Mcode IR → QBE IL emitter | +| `qbe.cm` | QBE IL operation templates | +| `internal/bootstrap.cm` | Pipeline orchestrator | + +## Test Files + +| File | Tests | +|------|-------| +| `parse_test.ce` | Type tags, access_kind, intrinsic resolution | +| `fold_test.ce` | Type propagation, purity, intrinsic hints | +| `mcode_test.ce` | Typed load/store, decomposed calls | +| `streamline_test.ce` | Optimization counts, IR before/after | +| `qbe_test.ce` | End-to-end QBE IL generation | diff --git a/fold.cm b/fold.cm index f1ed7cc9..adc6087d 100644 --- a/fold.cm +++ b/fold.cm @@ -158,6 +158,7 @@ var fold = function(ast) { var name = null var sv = null var item = null + var rhs_target = null while (i < length(stmts)) { stmt = stmts[i] kind = stmt.kind @@ -169,6 +170,19 @@ var fold = function(ast) { register_const(fn_nr, name, stmt.right) } } + if (name != null && stmt.right != null && stmt.right.kind == "(") { + rhs_target = stmt.right.expression + if (rhs_target != null && rhs_target.intrinsic == true) { + sv = scope_var(fn_nr, name) + if (sv != null && sv.type_tag == null) { + if (rhs_target.name == "array") sv.type_tag = "array" + else if (rhs_target.name == "record") sv.type_tag = "record" + else if (rhs_target.name == "text") sv.type_tag = "text" + else if (rhs_target.name == "number") sv.type_tag = "number" + else if (rhs_target.name == "blob") sv.type_tag = "blob" + } + } + } } else if (kind == "function") { name = stmt.name if (name != null && stmt.arity != null) { @@ -320,6 +334,8 @@ var fold = function(ast) { var ar = null var akey = null var tv = null + var att = null + var arg = null // Recurse into children first (bottom-up) if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" || @@ -385,6 +401,10 @@ var fold = function(ast) { return copy_loc(expr, {kind: lit.kind, value: lit.value, number: lit.number}) } } + sv = scope_var(fn_nr, expr.name) + if (sv != null && sv.type_tag != null) { + expr.type_tag = sv.type_tag + } return expr } @@ -497,7 +517,7 @@ var fold = function(ast) { return expr } - // Call: stamp arity + // Call: stamp arity and fold intrinsic type checks if (k == "(") { target = expr.expression if (target != null && target.kind == "name" && target.level == 0) { @@ -506,6 +526,30 @@ var fold = function(ast) { if (fn_arities[akey] != null) ar = fn_arities[akey][target.name] if (ar != null) expr.arity = ar } + if (target != null && target.intrinsic == true && length(expr.list) == 1) { + arg = expr.list[0] + att = null + if (arg.type_tag != null) { + att = arg.type_tag + } else if (arg.kind == "name" && arg.level == 0) { + sv = scope_var(fn_nr, arg.name) + if (sv != null) att = sv.type_tag + } + if (att != null) { + if (target.name == "is_array") return make_bool(att == "array", expr) + if (target.name == "is_text") return make_bool(att == "text", expr) + if (target.name == "is_number") return make_bool(att == "number" || att == "integer", expr) + if (target.name == "is_integer") return make_bool(att == "integer", expr) + if (target.name == "is_function") return make_bool(att == "function", expr) + if (target.name == "is_logical") return make_bool(att == "logical", expr) + if (target.name == "is_null") return make_bool(att == "null", expr) + if (target.name == "is_object") return make_bool(att == "record", expr) + if (target.name == "length") { + if (att == "array") expr.hint = "array_length" + else if (att == "text") expr.hint = "text_length" + } + } + } return expr } @@ -525,6 +569,7 @@ var fold = function(ast) { if (k == "var" || k == "def") { stmt.right = fold_expr(stmt.right, fn_nr) + if (is_pure(stmt.right)) stmt.pure = true return stmt } if (k == "var_list") { diff --git a/fold.mach b/fold.mach index 886da291..3b4ec013 100644 Binary files a/fold.mach and b/fold.mach differ diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm index 2f0984e8..fabcea7b 100644 --- a/internal/bootstrap.cm +++ b/internal/bootstrap.cm @@ -41,11 +41,17 @@ var boot_env = {use: use_basic} var tokenize_mod = boot_load("tokenize", boot_env) var parse_mod = boot_load("parse", boot_env) var fold_mod = boot_load("fold", boot_env) +use_cache['tokenize'] = tokenize_mod +use_cache['parse'] = parse_mod +use_cache['fold'] = fold_mod // Optionally load mcode compiler module var mcode_mod = null +var streamline_mod = null +var qbe_emit_mod = null if (use_mcode) { mcode_mod = boot_load("mcode", boot_env) + use_cache['mcode'] = mcode_mod } // Warn if any .cm source is newer than its .mach bytecode @@ -55,6 +61,9 @@ function check_mach_stale() { ["parse.cm", "parse.mach"], ["fold.cm", "fold.mach"], ["mcode.cm", "mcode.mach"], + ["streamline.cm", "streamline.mach"], + ["qbe.cm", "qbe.mach"], + ["qbe_emit.cm", "qbe_emit.mach"], ["internal/bootstrap.cm", "internal/bootstrap.mach"], ["internal/engine.cm", "internal/engine.mach"] ] @@ -118,26 +127,78 @@ function analyze(src, filename) { return ast } +// Load a module from .mach bytecode, falling back to source compilation +function load_module(name, env) { + var mach_path = core_path + '/' + name + ".mach" + var data = null + var src_path = null + var src = null + var ast = null + if (fd.is_file(mach_path)) { + data = fd.slurp(mach_path) + return mach_load(data, env) + } + src_path = core_path + '/' + name + ".cm" + src = text(fd.slurp(src_path)) + ast = analyze(src, src_path) + return mach_eval_ast(name, json.encode(ast), env) +} + +// Load optimization pipeline modules (needs analyze to be defined) +var qbe_macros = null +if (use_mcode) { + streamline_mod = load_module("streamline", boot_env) + use_cache['streamline'] = streamline_mod + if (emit_qbe) { + qbe_macros = load_module("qbe", boot_env) + qbe_emit_mod = load_module("qbe_emit", boot_env) + use_cache['qbe'] = qbe_macros + use_cache['qbe_emit'] = qbe_emit_mod + } +} + // Run AST through either mcode or mach pipeline function run_ast(name, ast, env) { var compiled = null + var optimized = null + var qbe_il = null if (use_mcode) { compiled = mcode_mod(ast) - return mcode_run(name, json.encode(compiled), env) + optimized = streamline_mod(compiled) + if (emit_qbe) { + qbe_il = qbe_emit_mod(optimized, qbe_macros) + print(qbe_il) + return null + } + return mcode_run(name, json.encode(optimized), env) } return mach_eval_ast(name, json.encode(ast), env) } // use() with ƿit pipeline for .cm modules function use_fn(path) { - var file_path = path + '.cm' + var file_path = null + var mach_path = null + var data = null var script = null var ast = null var result = null if (use_cache[path]) return use_cache[path] - // Check CWD first, then core_path + // Try .mach bytecode first (CWD then core_path) + mach_path = path + '.mach' + if (!fd.is_file(mach_path)) + mach_path = core_path + '/' + path + '.mach' + if (fd.is_file(mach_path)) { + data = fd.slurp(mach_path) + result = mach_load(data, {use: use_fn}) + use_cache[path] = result + return result + } + + // Try .cm source (CWD then core_path) + file_path = path + '.cm' if (!fd.is_file(file_path)) file_path = core_path + '/' + path + '.cm' diff --git a/internal/bootstrap.mach b/internal/bootstrap.mach index 452430bb..0a18f9cd 100644 Binary files a/internal/bootstrap.mach and b/internal/bootstrap.mach differ diff --git a/mcode.cm b/mcode.cm index d8a30ecd..5f6df508 100644 --- a/mcode.cm +++ b/mcode.cm @@ -51,6 +51,13 @@ var mcode = function(ast) { var s_cur_col = 0 var s_filename = null + // Shared closure vars for binop helpers (avoids >4 param functions) + var _bp_dest = 0 + var _bp_left = 0 + var _bp_right = 0 + var _bp_ln = null + var _bp_rn = null + // State save/restore for nested function compilation var save_state = function() { return { @@ -260,15 +267,19 @@ var mcode = function(ast) { } // emit_add_decomposed: int path -> text path -> float path -> disrupt - var emit_add_decomposed = function(dest, left, right, left_node, right_node) { + // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure + var emit_add_decomposed = function() { + var dest = _bp_dest + var left = _bp_left + var right = _bp_right var t0 = 0 var t1 = 0 - var left_is_int = is_known_int(left_node) - var left_is_text = is_known_text(left_node) - var left_is_num = is_known_number(left_node) - var right_is_int = is_known_int(right_node) - var right_is_text = is_known_text(right_node) - var right_is_num = is_known_number(right_node) + var left_is_int = is_known_int(_bp_ln) + var left_is_text = is_known_text(_bp_ln) + var left_is_num = is_known_number(_bp_ln) + var right_is_int = is_known_int(_bp_rn) + var right_is_text = is_known_text(_bp_rn) + var right_is_num = is_known_number(_bp_rn) var not_int = null var not_text = null var done = null @@ -346,13 +357,17 @@ var mcode = function(ast) { } // emit_numeric_binop: int path -> float path -> disrupt - var emit_numeric_binop = function(int_op, float_op, dest, left, right, left_node, right_node) { + // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure + var emit_numeric_binop = function(int_op, float_op) { + var dest = _bp_dest + var left = _bp_left + var right = _bp_right var t0 = 0 var t1 = 0 - var left_is_int = is_known_int(left_node) - var left_is_num = is_known_number(left_node) - var right_is_int = is_known_int(right_node) - var right_is_num = is_known_number(right_node) + var left_is_int = is_known_int(_bp_ln) + var left_is_num = is_known_number(_bp_ln) + var right_is_int = is_known_int(_bp_rn) + var right_is_num = is_known_number(_bp_rn) var not_int = null var done = null var err = null @@ -404,7 +419,11 @@ var mcode = function(ast) { } // emit_eq_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(false) - var emit_eq_decomposed = function(dest, left, right, left_node, right_node) { + // reads _bp_dest, _bp_left, _bp_right from closure + var emit_eq_decomposed = function() { + var dest = _bp_dest + var left = _bp_left + var right = _bp_right var t0 = 0 var t1 = 0 var done = gen_label("eq_done") @@ -472,7 +491,11 @@ var mcode = function(ast) { } // emit_ne_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(true) - var emit_ne_decomposed = function(dest, left, right, left_node, right_node) { + // reads _bp_dest, _bp_left, _bp_right from closure + var emit_ne_decomposed = function() { + var dest = _bp_dest + var left = _bp_left + var right = _bp_right var t0 = 0 var t1 = 0 var done = gen_label("ne_done") @@ -549,15 +572,19 @@ var mcode = function(ast) { } // emit_relational: int -> float -> text -> disrupt - var emit_relational = function(int_op, float_op, text_op, dest, left, right, left_node, right_node) { + // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure + var emit_relational = function(int_op, float_op, text_op) { + var dest = _bp_dest + var left = _bp_left + var right = _bp_right var t0 = 0 var t1 = 0 - var left_is_int = is_known_int(left_node) - var left_is_num = is_known_number(left_node) - var left_is_text = is_known_text(left_node) - var right_is_int = is_known_int(right_node) - var right_is_num = is_known_number(right_node) - var right_is_text = is_known_text(right_node) + var left_is_int = is_known_int(_bp_ln) + var left_is_num = is_known_number(_bp_ln) + var left_is_text = is_known_text(_bp_ln) + var right_is_int = is_known_int(_bp_rn) + var right_is_num = is_known_number(_bp_rn) + var right_is_text = is_known_text(_bp_rn) var not_int = null var not_num = null var done = null @@ -654,29 +681,33 @@ var mcode = function(ast) { } // Central router: maps op string to decomposition helper - var emit_binop = function(op_str, dest, left, right, left_node, right_node) { + // Sets _bp_* closure vars then calls helper with reduced args + var emit_binop = function(op_str, dest, left, right) { + _bp_dest = dest + _bp_left = left + _bp_right = right if (op_str == "add") { - emit_add_decomposed(dest, left, right, left_node, right_node) + emit_add_decomposed() } else if (op_str == "subtract") { - emit_numeric_binop("sub_int", "sub_float", dest, left, right, left_node, right_node) + emit_numeric_binop("sub_int", "sub_float") } else if (op_str == "multiply") { - emit_numeric_binop("mul_int", "mul_float", dest, left, right, left_node, right_node) + emit_numeric_binop("mul_int", "mul_float") } else if (op_str == "divide") { - emit_numeric_binop("div_int", "div_float", dest, left, right, left_node, right_node) + emit_numeric_binop("div_int", "div_float") } else if (op_str == "modulo") { - emit_numeric_binop("mod_int", "mod_float", dest, left, right, left_node, right_node) + emit_numeric_binop("mod_int", "mod_float") } else if (op_str == "eq") { - emit_eq_decomposed(dest, left, right, left_node, right_node) + emit_eq_decomposed() } else if (op_str == "ne") { - emit_ne_decomposed(dest, left, right, left_node, right_node) + emit_ne_decomposed() } else if (op_str == "lt") { - emit_relational("lt_int", "lt_float", "lt_text", dest, left, right, left_node, right_node) + emit_relational("lt_int", "lt_float", "lt_text") } else if (op_str == "le") { - emit_relational("le_int", "le_float", "le_text", dest, left, right, left_node, right_node) + emit_relational("le_int", "le_float", "le_text") } else if (op_str == "gt") { - emit_relational("gt_int", "gt_float", "gt_text", dest, left, right, left_node, right_node) + emit_relational("gt_int", "gt_float", "gt_text") } else if (op_str == "ge") { - emit_relational("ge_int", "ge_float", "ge_text", dest, left, right, left_node, right_node) + emit_relational("ge_int", "ge_float", "ge_text") } else { // Passthrough for bitwise, pow, in, etc. emit_3(op_str, dest, left, right) @@ -685,19 +716,31 @@ var mcode = function(ast) { } var emit_get_prop = function(dest, obj, prop) { - add_instr(["load", dest, obj, prop]) + add_instr(["load_field", dest, obj, prop]) } var emit_set_prop = function(obj, prop, val) { - add_instr(["store", obj, val, prop]) + add_instr(["store_field", obj, val, prop]) } - var emit_get_elem = function(dest, obj, idx) { - emit_3("load", dest, obj, idx) + var emit_get_elem = function(dest, obj, idx, access_kind) { + if (access_kind == "index") { + emit_3("load_index", dest, obj, idx) + } else if (access_kind == "field") { + emit_3("load_field", dest, obj, idx) + } else { + emit_3("load_dynamic", dest, obj, idx) + } } - var emit_set_elem = function(obj, idx, val) { - emit_3("store", obj, val, idx) + var emit_set_elem = function(obj, idx, val, access_kind) { + if (access_kind == "index") { + emit_3("store_index", obj, val, idx) + } else if (access_kind == "field") { + emit_3("store_field", obj, val, idx) + } else { + emit_3("store_dynamic", obj, val, idx) + } } var emit_call = function(dest, func_slot, args) { @@ -718,23 +761,37 @@ var mcode = function(ast) { } var emit_call_method = function(dest, obj, prop, args) { - var instr = ["callmethod", dest, obj, prop] + var method_slot = alloc_slot() + add_instr(["load_field", method_slot, obj, prop]) + var argc = length(args) + var frame_slot = alloc_slot() + emit_3("frame", frame_slot, method_slot, argc) + emit_3("setarg", frame_slot, 0, obj) + var arg_idx = 1 var _i = 0 - while (_i < length(args)) { - push(instr, args[_i]) + while (_i < argc) { + emit_3("setarg", frame_slot, arg_idx, args[_i]) + arg_idx = arg_idx + 1 _i = _i + 1 } - add_instr(instr) + emit_2("invoke", frame_slot, dest) } var emit_call_method_dyn = function(dest, obj, key_reg, args) { - var instr = ["callmethod_dyn", dest, obj, key_reg] + var method_slot = alloc_slot() + emit_3("load_dynamic", method_slot, obj, key_reg) + var argc = length(args) + var frame_slot = alloc_slot() + emit_3("frame", frame_slot, method_slot, argc) + emit_3("setarg", frame_slot, 0, obj) + var arg_idx = 1 var _i = 0 - while (_i < length(args)) { - push(instr, args[_i]) + while (_i < argc) { + emit_3("setarg", frame_slot, arg_idx, args[_i]) + arg_idx = arg_idx + 1 _i = _i + 1 } - add_instr(instr) + emit_2("invoke", frame_slot, dest) } var emit_go_call = function(func_slot, args) { @@ -920,7 +977,9 @@ var mcode = function(ast) { if (op == null) { op = "add" } - emit_binop(op, dest, left_slot, right_slot, left, right) + _bp_ln = left + _bp_rn = right + emit_binop(op, dest, left_slot, right_slot) return dest } @@ -972,7 +1031,9 @@ var mcode = function(ast) { } right_slot = gen_expr(right, -1) dest = alloc_slot() - emit_binop(op, dest, left_slot, right_slot, null, right) + _bp_ln = null + _bp_rn = right + emit_binop(op, dest, left_slot, right_slot) if (level == 0) { local = find_var(name) if (local >= 0) { @@ -995,7 +1056,9 @@ var mcode = function(ast) { emit_get_prop(old_val, obj_slot, prop) right_slot = gen_expr(right, -1) dest = alloc_slot() - emit_binop(op, dest, old_val, right_slot, null, right) + _bp_ln = null + _bp_rn = right + emit_binop(op, dest, old_val, right_slot) emit_set_prop(obj_slot, prop, dest) return dest } else if (left_kind == "[") { @@ -1004,11 +1067,13 @@ var mcode = function(ast) { obj_slot = gen_expr(obj, -1) idx_slot = gen_expr(idx_expr, -1) old_val = alloc_slot() - emit_get_elem(old_val, obj_slot, idx_slot) + emit_get_elem(old_val, obj_slot, idx_slot, left.access_kind) right_slot = gen_expr(right, -1) dest = alloc_slot() - emit_binop(op, dest, old_val, right_slot, null, right) - emit_set_elem(obj_slot, idx_slot, dest) + _bp_ln = null + _bp_rn = right + emit_binop(op, dest, old_val, right_slot) + emit_set_elem(obj_slot, idx_slot, dest, left.access_kind) return dest } return -1 @@ -1081,7 +1146,7 @@ var mcode = function(ast) { idx_expr = left.right obj_slot = gen_expr(obj, -1) idx_slot = gen_expr(idx_expr, -1) - emit_set_elem(obj_slot, idx_slot, val_slot) + emit_set_elem(obj_slot, idx_slot, val_slot, left.access_kind) } return val_slot } @@ -1301,7 +1366,7 @@ var mcode = function(ast) { obj_slot = gen_expr(obj, -1) idx_slot = gen_expr(idx, -1) slot = alloc_slot() - emit_get_elem(slot, obj_slot, idx_slot) + emit_get_elem(slot, obj_slot, idx_slot, expr.access_kind) return slot } @@ -1357,7 +1422,9 @@ var mcode = function(ast) { a0 = gen_expr(args_list[0], -1) a1 = gen_expr(args_list[1], -1) d = alloc_slot() - emit_binop(mop, d, a0, a1, args_list[0], args_list[1]) + _bp_ln = args_list[0] + _bp_rn = args_list[1] + emit_binop(mop, d, a0, a1) return d } @@ -1442,7 +1509,9 @@ var mcode = function(ast) { emit_access_intrinsic(old_slot, name) } new_slot = alloc_slot() - emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node) + _bp_ln = null + _bp_rn = one_node + emit_binop(arith_op, new_slot, old_slot, one_slot) if (level == 0) { local = find_var(name) if (local >= 0) { @@ -1462,7 +1531,9 @@ var mcode = function(ast) { old_slot = alloc_slot() emit_get_prop(old_slot, obj_slot, prop) new_slot = alloc_slot() - emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node) + _bp_ln = null + _bp_rn = one_node + emit_binop(arith_op, new_slot, old_slot, one_slot) emit_set_prop(obj_slot, prop, new_slot) return postfix ? old_slot : new_slot } else if (operand_kind == "[") { @@ -1471,10 +1542,12 @@ var mcode = function(ast) { obj_slot = gen_expr(obj, -1) idx_slot = gen_expr(idx_expr, -1) old_slot = alloc_slot() - emit_get_elem(old_slot, obj_slot, idx_slot) + emit_get_elem(old_slot, obj_slot, idx_slot, operand.access_kind) new_slot = alloc_slot() - emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node) - emit_set_elem(obj_slot, idx_slot, new_slot) + _bp_ln = null + _bp_rn = one_node + emit_binop(arith_op, new_slot, old_slot, one_slot) + emit_set_elem(obj_slot, idx_slot, new_slot, operand.access_kind) return postfix ? old_slot : new_slot } } @@ -1911,7 +1984,9 @@ var mcode = function(ast) { case_expr = case_node.expression case_val = gen_expr(case_expr, -1) cmp_slot = alloc_slot() - emit_binop("eq", cmp_slot, switch_val, case_val, null, case_expr) + _bp_ln = null + _bp_rn = case_expr + emit_binop("eq", cmp_slot, switch_val, case_val) emit_jump_cond("jump_true", cmp_slot, case_label) push(case_labels, case_label) } diff --git a/mcode.mach b/mcode.mach index e3fdf4a9..f5227053 100644 Binary files a/mcode.mach and b/mcode.mach differ diff --git a/parse.cm b/parse.cm index 77262d1f..addb6868 100644 --- a/parse.cm +++ b/parse.cm @@ -1493,6 +1493,22 @@ var parse = function(tokens, src, filename, tokenizer) { return functino_names[name] == true } + var derive_type_tag = function(expr) { + if (expr == null) return null + var k = expr.kind + if (k == "array") return "array" + if (k == "record") return "record" + if (k == "function") return "function" + if (k == "text" || k == "text literal") return "text" + if (k == "number") { + if (is_integer(expr.number)) return "integer" + return "number" + } + if (k == "true" || k == "false") return "logical" + if (k == "null") return "null" + return null + } + var _assign_kinds = { assign: true, "+=": true, "-=": true, "*=": true, "/=": true, "%=": true, "<<=": true, ">>=": true, ">>>=": true, @@ -1517,7 +1533,8 @@ var parse = function(tokens, src, filename, tokenizer) { function_nr: v.function_nr, nr_uses: v.nr_uses, closure: v.closure == 1, - level: 0 + level: 0, + type_tag: v.type_tag } slots = slots + 1 if (v.closure) close_slots = close_slots + 1 @@ -1648,13 +1665,26 @@ var parse = function(tokens, src, filename, tokenizer) { return null } + if (kind == "[") { + sem_check_expr(scope, expr.left) + sem_check_expr(scope, expr.right) + if (expr.right != null) { + if (expr.right.kind == "number" && is_integer(expr.right.number)) { + expr.access_kind = "index" + } else if (expr.right.kind == "text") { + expr.access_kind = "field" + } + } + return null + } + if (kind == "," || kind == "+" || kind == "-" || kind == "*" || kind == "/" || kind == "%" || kind == "==" || kind == "!=" || kind == "<" || kind == ">" || kind == "<=" || kind == ">=" || kind == "&&" || kind == "||" || kind == "&" || kind == "|" || kind == "^" || kind == "<<" || kind == ">>" || kind == ">>>" || kind == "**" || kind == "in" || - kind == "." || kind == "[") { + kind == ".") { sem_check_expr(scope, expr.left) sem_check_expr(scope, expr.right) return null @@ -1763,6 +1793,7 @@ var parse = function(tokens, src, filename, tokenizer) { if (r.level > 0) r.v.closure = 1 } else { expr.level = -1 + expr.intrinsic = true sem_add_intrinsic(name) } } @@ -1786,6 +1817,7 @@ var parse = function(tokens, src, filename, tokenizer) { var pname = null var def_val = null var sr = null + var tt = null if (kind == "var_list") { i = 0 @@ -1825,6 +1857,13 @@ var parse = function(tokens, src, filename, tokenizer) { } } sem_check_expr(scope, stmt.right) + if (name != null) { + tt = derive_type_tag(stmt.right) + if (tt != null) { + existing = sem_find_var(scope, name) + if (existing != null) existing.type_tag = tt + } + } return null } @@ -1902,6 +1941,9 @@ var parse = function(tokens, src, filename, tokenizer) { if (kind == "return" || kind == "go") { sem_check_expr(scope, stmt.expression) + if (stmt.expression != null && stmt.expression.kind == "(") { + stmt.tail = true + } return null } diff --git a/parse.mach b/parse.mach index d5dce8b2..9e259f42 100644 Binary files a/parse.mach and b/parse.mach differ diff --git a/qbe.cm b/qbe.cm index 6dbc6947..fa95e688 100644 --- a/qbe.cm +++ b/qbe.cm @@ -13,6 +13,11 @@ def js_true = 35 def js_exception = 15 def js_empty_text = 27 +// Shared closure vars for functions with >4 params +var _qop = null +var _qop2 = null +var _qflags = null + def int32_min = -2147483648 def int32_max = 2147483647 def mantissa_mask = 4503599627370495 @@ -398,18 +403,20 @@ var mod = function(p, ctx, a, b) { // ============================================================ // Helper: generate comparison for a given op string and int comparison QBE op -// null_true: whether null==null returns true (eq, le, ge) or false (ne, lt, gt) -var cmp = function(p, ctx, a, b, int_cmp_op, float_cmp_op_id, is_eq, is_ne, null_true) { +// reads _qflags = {int_cmp_op, float_id, is_eq, is_ne, null_true} from closure +var cmp = function(p, ctx, a, b) { + var int_cmp_op = _qflags.int_cmp_op + var float_cmp_op_id = _qflags.float_id var eq_only = 0 - if (is_eq || is_ne) { + var mismatch_val = js_false + var null_val = js_false + if (_qflags.is_eq || _qflags.is_ne) { eq_only = 1 } - var mismatch_val = js_false - if (is_ne) { + if (_qflags.is_ne) { mismatch_val = js_true } - var null_val = js_false - if (null_true) { + if (_qflags.null_true) { null_val = js_true } return `@${p}.start @@ -485,27 +492,32 @@ var cmp = function(p, ctx, a, b, int_cmp_op, float_cmp_op_id, is_eq, is_ne, null // MACH_EQ=0, NEQ=1, LT=2, LE=3, GT=4, GE=5 // null_true: eq, le, ge return true for null==null; ne, lt, gt return false var eq = function(p, ctx, a, b) { - return cmp(p, ctx, a, b, "ceqw", 0, true, false, true) + _qflags = {int_cmp_op: "ceqw", float_id: 0, is_eq: true, is_ne: false, null_true: true} + return cmp(p, ctx, a, b) } var ne = function(p, ctx, a, b) { - return cmp(p, ctx, a, b, "cnew", 1, false, true, false) + _qflags = {int_cmp_op: "cnew", float_id: 1, is_eq: false, is_ne: true, null_true: false} + return cmp(p, ctx, a, b) } var lt = function(p, ctx, a, b) { - return cmp(p, ctx, a, b, "csltw", 2, false, false, false) + _qflags = {int_cmp_op: "csltw", float_id: 2, is_eq: false, is_ne: false, null_true: false} + return cmp(p, ctx, a, b) } var le = function(p, ctx, a, b) { - return cmp(p, ctx, a, b, "cslew", 3, false, false, true) + _qflags = {int_cmp_op: "cslew", float_id: 3, is_eq: false, is_ne: false, null_true: true} + return cmp(p, ctx, a, b) } var gt = function(p, ctx, a, b) { - return cmp(p, ctx, a, b, "csgtw", 4, false, false, false) + _qflags = {int_cmp_op: "csgtw", float_id: 4, is_eq: false, is_ne: false, null_true: false} + return cmp(p, ctx, a, b) } var ge = function(p, ctx, a, b) { - return cmp(p, ctx, a, b, "csgew", 5, false, false, true) + _qflags = {int_cmp_op: "csgew", float_id: 5, is_eq: false, is_ne: false, null_true: true} } // ============================================================ @@ -627,7 +639,9 @@ var bnot = function(p, ctx, v) { // Both operands must be numeric. Int fast path, float -> convert to int32. // ============================================================ -var bitwise_op = function(p, ctx, a, b, qbe_op) { +// reads _qop from closure +var bitwise_op = function(p, ctx, a, b) { + var qbe_op = _qop return `@${p}.start %${p}.at =l and ${a}, 1 %${p}.bt =l and ${b}, 1 @@ -654,19 +668,24 @@ var bitwise_op = function(p, ctx, a, b, qbe_op) { } var band = function(p, ctx, a, b) { - return bitwise_op(p, ctx, a, b, "and") + _qop = "and" + return bitwise_op(p, ctx, a, b) } var bor = function(p, ctx, a, b) { - return bitwise_op(p, ctx, a, b, "or") + _qop = "or" + return bitwise_op(p, ctx, a, b) } var bxor = function(p, ctx, a, b) { - return bitwise_op(p, ctx, a, b, "xor") + _qop = "xor" + return bitwise_op(p, ctx, a, b) } // Shift ops: mask shift amount to 5 bits (& 31) -var shift_op = function(p, ctx, a, b, qbe_op) { +// reads _qop from closure +var shift_op = function(p, ctx, a, b) { + var qbe_op = _qop return `@${p}.start %${p}.at =l and ${a}, 1 %${p}.bt =l and ${b}, 1 @@ -694,15 +713,18 @@ var shift_op = function(p, ctx, a, b, qbe_op) { } var shl = function(p, ctx, a, b) { - return shift_op(p, ctx, a, b, "shl") + _qop = "shl" + return shift_op(p, ctx, a, b) } var shr = function(p, ctx, a, b) { - return shift_op(p, ctx, a, b, "sar") + _qop = "sar" + return shift_op(p, ctx, a, b) } var ushr = function(p, ctx, a, b) { - return shift_op(p, ctx, a, b, "shr") + _qop = "shr" + return shift_op(p, ctx, a, b) } // ============================================================ @@ -898,7 +920,9 @@ var gt_int = function(p, ctx, a, b) { return cmp_int(p, a, b, "csgtw") } var ge_int = function(p, ctx, a, b) { return cmp_int(p, a, b, "csgew") } // --- Comparisons (float path) --- -var cmp_float = function(p, ctx, a, b, op_id) { +// reads _qop from closure (op_id) +var cmp_float = function(p, ctx, a, b) { + var op_id = _qop return ` %${p}.fcr =w call $qbe_float_cmp(l ${ctx}, w ${op_id}, l ${a}, l ${b}) %${p}.fcrext =l extuw %${p}.fcr %${p}.fsh =l shl %${p}.fcrext, 5 @@ -906,15 +930,18 @@ var cmp_float = function(p, ctx, a, b, op_id) { ` } -var eq_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 0) } -var ne_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 1) } -var lt_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 2) } -var le_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 3) } -var gt_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 4) } -var ge_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 5) } +var eq_float = function(p, ctx, a, b) { _qop = 0; return cmp_float(p, ctx, a, b) } +var ne_float = function(p, ctx, a, b) { _qop = 1; return cmp_float(p, ctx, a, b) } +var lt_float = function(p, ctx, a, b) { _qop = 2; return cmp_float(p, ctx, a, b) } +var le_float = function(p, ctx, a, b) { _qop = 3; return cmp_float(p, ctx, a, b) } +var gt_float = function(p, ctx, a, b) { _qop = 4; return cmp_float(p, ctx, a, b) } +var ge_float = function(p, ctx, a, b) { _qop = 5; return cmp_float(p, ctx, a, b) } // --- Comparisons (text path) --- -var cmp_text = function(p, ctx, a, b, qbe_op, eq_only) { +// reads _qop (qbe_op) and _qop2 (eq_only) from closure +var cmp_text = function(p, ctx, a, b) { + var qbe_op = _qop + var eq_only = _qop2 return ` %${p}.scmp =w call $js_string_compare_value(l ${ctx}, l ${a}, l ${b}, w ${eq_only}) %${p}.tcr =w ${qbe_op} %${p}.scmp, 0 %${p}.tcrext =l extuw %${p}.tcr @@ -923,12 +950,12 @@ var cmp_text = function(p, ctx, a, b, qbe_op, eq_only) { ` } -var eq_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "ceqw", 1) } -var ne_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "cnew", 1) } -var lt_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csltw", 0) } -var le_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "cslew", 0) } -var gt_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csgtw", 0) } -var ge_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csgew", 0) } +var eq_text = function(p, ctx, a, b) { _qop = "ceqw"; _qop2 = 1; return cmp_text(p, ctx, a, b) } +var ne_text = function(p, ctx, a, b) { _qop = "cnew"; _qop2 = 1; return cmp_text(p, ctx, a, b) } +var lt_text = function(p, ctx, a, b) { _qop = "csltw"; _qop2 = 0; return cmp_text(p, ctx, a, b) } +var le_text = function(p, ctx, a, b) { _qop = "cslew"; _qop2 = 0; return cmp_text(p, ctx, a, b) } +var gt_text = function(p, ctx, a, b) { _qop = "csgtw"; _qop2 = 0; return cmp_text(p, ctx, a, b) } +var ge_text = function(p, ctx, a, b) { _qop = "csgew"; _qop2 = 0; return cmp_text(p, ctx, a, b) } // --- Comparisons (bool path) --- var eq_bool = function(p, a, b) { diff --git a/qbe.mach b/qbe.mach new file mode 100644 index 00000000..fd0219c9 Binary files /dev/null and b/qbe.mach differ diff --git a/qbe_emit.cm b/qbe_emit.cm new file mode 100644 index 00000000..bf94d71c --- /dev/null +++ b/qbe_emit.cm @@ -0,0 +1,667 @@ +// qbe_emit.cm — mcode IR → QBE IL compiler +// Takes mcode IR (from mcode.cm) and uses qbe.cm macros to produce +// a complete QBE IL program ready for the qbe compiler. +// qbe module is passed via env as 'qbe' + +var qbe_emit = function(ir, qbe) { + var out = [] + var data_out = [] + var str_table = {} + var str_id = 0 + var uid = 0 + + // ============================================================ + // Output helpers + // ============================================================ + + var emit = function(s) { + push(out, s) + } + + var fresh = function() { + uid = uid + 1 + return "u" + text(uid) + } + + var s = function(n) { + return "%s" + text(n) + } + + var sanitize = function(lbl) { + var r = replace(lbl, ".", "_") + r = replace(r, "-", "_") + r = replace(r, " ", "_") + r = replace(r, "/", "_") + r = replace(r, "<", "") + r = replace(r, ">", "") + r = replace(r, "(", "") + r = replace(r, ")", "") + return r + } + + // ============================================================ + // String interning — emit data section entries + // ============================================================ + + var intern_str = function(val) { + if (str_table[val] != null) return str_table[val] + var label = "$d_str_" + text(str_id) + str_id = str_id + 1 + var escaped = replace(val, "\\", "\\\\") + escaped = replace(escaped, "\"", "\\\"") + var line = "data " + label + ' = ' + '{ b "' + escaped + '", b 0 }' + push(data_out, line) + str_table[val] = label + return label + } + + // ============================================================ + // Extract property name from mcode operand + // ============================================================ + + var prop_name = function(a) { + if (is_text(a)) return a + if (is_object(a)) { + if (a.name != null) return a.name + if (a.value != null) return a.value + } + return null + } + + // ============================================================ + // Compile one function's instructions + // ============================================================ + + var compile_fn = function(fn, fn_idx, is_main) { + var instrs = fn.instructions + var nr_slots = fn.nr_slots + var nr_args = fn.nr_args + var name = is_main ? "cell_main" : "cell_fn_" + text(fn_idx) + name = sanitize(name) + var i = 0 + var instr = null + var op = null + var a1 = null + var a2 = null + var a3 = null + var a4 = null + var p = null + var pn = null + var sl = null + var fop_id = 0 + + // Function signature: (ctx, frame_ptr) → JSValue + emit(`export function l $${name}(l %ctx, l %fp) {`) + emit("@entry") + + // Load all slots from frame into SSA variables + // Each slot is a JSValue (8 bytes) at fp + slot*8 + var off = 0 + i = 0 + while (i < nr_slots) { + off = i * 8 + emit(` %p${text(i)} =l add %fp, ${text(off)}`) + emit(` ${s(i)} =l loadl %p${text(i)}`) + i = i + 1 + } + + // Walk instructions + i = 0 + while (i < length(instrs)) { + instr = instrs[i] + i = i + 1 + + // Labels are plain strings + if (is_text(instr)) { + emit("@" + sanitize(instr)) + continue + } + + op = instr[0] + a1 = instr[1] + a2 = instr[2] + a3 = instr[3] + + // --- Constants --- + + if (op == "int") { + emit(` ${s(a1)} =l copy ${text(a2 * 2)}`) + continue + } + if (op == "null") { + emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`) + continue + } + if (op == "true") { + emit(` ${s(a1)} =l copy ${text(qbe.js_true)}`) + continue + } + if (op == "false") { + emit(` ${s(a1)} =l copy ${text(qbe.js_false)}`) + continue + } + if (op == "access") { + if (is_number(a2)) { + if (is_integer(a2)) { + emit(` ${s(a1)} =l copy ${text(a2 * 2)}`) + } else { + emit(` ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2)})`) + } + } else if (is_text(a2)) { + sl = intern_str(a2) + emit(` ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`) + } else if (is_object(a2)) { + if (a2.make == "intrinsic") { + sl = intern_str(a2.name) + emit(` ${s(a1)} =l call $cell_rt_get_intrinsic(l %ctx, l ${sl})`) + } else if (a2.kind == "number") { + if (a2.number != null && is_integer(a2.number)) { + emit(` ${s(a1)} =l copy ${text(a2.number * 2)}`) + } else if (a2.number != null) { + emit(` ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2.number)})`) + } else { + emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`) + } + } else if (a2.kind == "text") { + sl = intern_str(a2.value) + emit(` ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`) + } else if (a2.kind == "true") { + emit(` ${s(a1)} =l copy ${text(qbe.js_true)}`) + } else if (a2.kind == "false") { + emit(` ${s(a1)} =l copy ${text(qbe.js_false)}`) + } else if (a2.kind == "null") { + emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`) + } else { + emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`) + } + } else { + emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`) + } + continue + } + + // --- Movement --- + + if (op == "move") { + emit(` ${s(a1)} =l copy ${s(a2)}`) + continue + } + + // --- Arithmetic (int path) — use qbe.cm macros --- + + if (op == "add_int") { + p = fresh() + emit(qbe.add_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "sub_int") { + p = fresh() + emit(qbe.sub_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "mul_int") { + p = fresh() + emit(qbe.mul_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "div_int") { + p = fresh() + emit(qbe.div_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "mod_int") { + p = fresh() + emit(qbe.mod_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + + // --- Arithmetic (float path) --- + + if (op == "add_float") { + p = fresh() + emit(qbe.add_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "sub_float") { + p = fresh() + emit(qbe.sub_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "mul_float") { + p = fresh() + emit(qbe.mul_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "div_float") { + p = fresh() + emit(qbe.div_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "mod_float") { + p = fresh() + emit(qbe.mod_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + + // --- String concat --- + + if (op == "concat") { + p = fresh() + emit(qbe.concat(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + + // --- Type checks — use qbe.cm macros --- + + if (op == "is_int") { + p = fresh() + emit(qbe.is_int(p, s(a2))) + emit(qbe.new_bool(p + ".r", "%" + p)) + emit(` ${s(a1)} =l copy %${p}.r`) + continue + } + if (op == "is_text") { + p = fresh() + emit(qbe.is_imm_text(p, s(a2))) + emit(qbe.new_bool(p + ".r", "%" + p)) + emit(` ${s(a1)} =l copy %${p}.r`) + continue + } + if (op == "is_num") { + p = fresh() + emit(qbe.is_number(p, s(a2))) + emit(qbe.new_bool(p + ".r", "%" + p)) + emit(` ${s(a1)} =l copy %${p}.r`) + continue + } + if (op == "is_bool") { + p = fresh() + emit(qbe.is_bool(p, s(a2))) + emit(qbe.new_bool(p + ".r", "%" + p)) + emit(` ${s(a1)} =l copy %${p}.r`) + continue + } + if (op == "is_null") { + p = fresh() + emit(qbe.is_null(p, s(a2))) + emit(qbe.new_bool(p + ".r", "%" + p)) + emit(` ${s(a1)} =l copy %${p}.r`) + continue + } + if (op == "is_identical") { + p = fresh() + emit(qbe.is_identical(p, s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + + // --- Comparisons (int path) --- + + if (op == "eq_int") { + p = fresh() + emit(qbe.eq_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "ne_int") { + p = fresh() + emit(qbe.ne_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "lt_int") { + p = fresh() + emit(qbe.lt_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "gt_int") { + p = fresh() + emit(qbe.gt_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "le_int") { + p = fresh() + emit(qbe.le_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "ge_int") { + p = fresh() + emit(qbe.ge_int(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + + // --- Comparisons (float/text/bool) --- + + if (op == "eq_float") { + p = fresh() + emit(qbe.eq_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "ne_float") { + p = fresh() + emit(qbe.ne_float(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "lt_float" || op == "gt_float" || op == "le_float" || op == "ge_float") { + p = fresh() + fop_id = 0 + if (op == "lt_float") fop_id = 2 + else if (op == "le_float") fop_id = 3 + else if (op == "gt_float") fop_id = 4 + else if (op == "ge_float") fop_id = 5 + emit(qbe.cmp_float != null ? cmp_float(p, "%ctx", s(a2), s(a3), fop_id) : ` %${p} =l call $qbe_float_cmp(l %ctx, w ${text(fop_id)}, l ${s(a2)}, l ${s(a3)})`) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "eq_text") { + p = fresh() + emit(qbe.eq_text(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "ne_text") { + p = fresh() + emit(qbe.ne_text(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "lt_text" || op == "gt_text" || op == "le_text" || op == "ge_text") { + p = fresh() + emit(` ${s(a1)} =l call $cell_rt_${op}(l %ctx, l ${s(a2)}, l ${s(a3)})`) + continue + } + if (op == "eq_bool") { + p = fresh() + emit(qbe.eq_bool(p, s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "ne_bool") { + p = fresh() + emit(qbe.ne_bool(p, s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "eq_tol" || op == "ne_tol") { + emit(` ${s(a1)} =l call $cell_rt_${op}(l %ctx, l ${s(a2)}, l ${s(a3)})`) + continue + } + + // --- Boolean ops --- + + if (op == "not") { + p = fresh() + emit(qbe.lnot(p, "%ctx", s(a2))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "and") { + emit(` ${s(a1)} =l and ${s(a2)}, ${s(a3)}`) + continue + } + if (op == "or") { + emit(` ${s(a1)} =l or ${s(a2)}, ${s(a3)}`) + continue + } + + // --- Bitwise ops — use qbe.cm macros --- + + if (op == "bitnot") { + p = fresh() + emit(qbe.bnot(p, "%ctx", s(a2))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "bitand") { + p = fresh() + emit(qbe.band(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "bitor") { + p = fresh() + emit(qbe.bor(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "bitxor") { + p = fresh() + emit(qbe.bxor(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "shl") { + p = fresh() + emit(qbe.shl(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "shr") { + p = fresh() + emit(qbe.shr(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + if (op == "ushr") { + p = fresh() + emit(qbe.ushr(p, "%ctx", s(a2), s(a3))) + emit(` ${s(a1)} =l copy %${p}`) + continue + } + + // --- Property access — runtime calls --- + + if (op == "load_field") { + pn = prop_name(a3) + if (pn != null) { + sl = intern_str(pn) + emit(` ${s(a1)} =l call $cell_rt_load_field(l %ctx, l ${s(a2)}, l ${sl})`) + } else { + emit(` ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`) + } + continue + } + if (op == "load_index") { + emit(` ${s(a1)} =l call $cell_rt_load_index(l %ctx, l ${s(a2)}, l ${s(a3)})`) + continue + } + if (op == "load_dynamic") { + emit(` ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`) + continue + } + if (op == "store_field") { + pn = prop_name(a3) + if (pn != null) { + sl = intern_str(pn) + emit(` call $cell_rt_store_field(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${sl})`) + } else { + emit(` call $cell_rt_store_dynamic(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`) + } + continue + } + if (op == "store_index") { + emit(` call $cell_rt_store_index(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`) + continue + } + if (op == "store_dynamic") { + emit(` call $cell_rt_store_dynamic(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`) + continue + } + + // --- Closure access --- + + if (op == "get") { + emit(` ${s(a1)} =l call $cell_rt_get_closure(l %ctx, l %fp, l ${text(a2)}, l ${text(a3)})`) + continue + } + if (op == "put") { + emit(` call $cell_rt_put_closure(l %ctx, l %fp, l ${s(a1)}, l ${text(a2)}, l ${text(a3)})`) + continue + } + + // --- Control flow --- + + if (op == "jump") { + emit(` jmp @${sanitize(a1)}`) + continue + } + if (op == "jump_true") { + p = fresh() + emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`) + emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`) + emit(`@${p}_f`) + continue + } + if (op == "jump_false") { + p = fresh() + emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`) + emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`) + emit(`@${p}_t`) + continue + } + if (op == "jump_null") { + p = fresh() + emit(` %${p} =w ceql ${s(a1)}, ${text(qbe.js_null)}`) + emit(` jnz %${p}, @${sanitize(a2)}, @${p}_nn`) + emit(`@${p}_nn`) + continue + } + if (op == "jump_not_null") { + p = fresh() + emit(` %${p} =w cnel ${s(a1)}, ${text(qbe.js_null)}`) + emit(` jnz %${p}, @${sanitize(a2)}, @${p}_n`) + emit(`@${p}_n`) + continue + } + if (op == "wary_true") { + p = fresh() + emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`) + emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`) + emit(`@${p}_f`) + continue + } + if (op == "wary_false") { + p = fresh() + emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`) + emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`) + emit(`@${p}_t`) + continue + } + + // --- Function calls --- + + if (op == "frame") { + emit(` ${s(a1)} =l call $cell_rt_frame(l %ctx, l ${s(a2)}, l ${text(a3)})`) + continue + } + if (op == "setarg") { + emit(` call $cell_rt_setarg(l ${s(a1)}, l ${text(a2)}, l ${s(a3)})`) + continue + } + if (op == "invoke") { + emit(` ${s(a2)} =l call $cell_rt_invoke(l %ctx, l ${s(a1)})`) + continue + } + if (op == "goframe") { + emit(` ${s(a1)} =l call $cell_rt_goframe(l %ctx, l ${s(a2)}, l ${text(a3)})`) + continue + } + if (op == "goinvoke") { + emit(` call $cell_rt_goinvoke(l %ctx, l ${s(a1)})`) + continue + } + + // --- Function object creation --- + + if (op == "function") { + emit(` ${s(a1)} =l call $cell_rt_make_function(l %ctx, l ${text(a2)})`) + continue + } + + // --- Array push/pop --- + + if (op == "push") { + emit(` call $cell_rt_push(l %ctx, l ${s(a1)}, l ${s(a2)})`) + continue + } + if (op == "pop") { + emit(` ${s(a1)} =l call $cell_rt_pop(l %ctx, l ${s(a2)})`) + continue + } + + // --- Misc --- + + if (op == "return") { + emit(` ret ${s(a1)}`) + continue + } + if (op == "disrupt") { + emit(` call $cell_rt_disrupt(l %ctx)`) + emit(` ret ${text(qbe.js_null)}`) + continue + } + if (op == "delete") { + emit(` ${s(a1)} =l call $cell_rt_delete(l %ctx, l ${s(a2)}, l ${s(a3)})`) + continue + } + if (op == "typeof") { + emit(` ${s(a1)} =l call $cell_rt_typeof(l %ctx, l ${s(a2)})`) + continue + } + + // --- Unknown opcode --- + emit(` # unknown: ${op}`) + } + + emit("}") + emit("") + } + + // ============================================================ + // Main: compile all functions then main + // ============================================================ + + var fi = 0 + while (fi < length(ir.functions)) { + compile_fn(ir.functions[fi], fi, false) + fi = fi + 1 + } + + compile_fn(ir.main, -1, true) + + // Assemble: data section first, then function bodies + var result = [] + var di = 0 + while (di < length(data_out)) { + push(result, data_out[di]) + di = di + 1 + } + if (length(data_out) > 0) push(result, "") + + di = 0 + while (di < length(out)) { + push(result, out[di]) + di = di + 1 + } + + return text(result, "\n") +} + +return qbe_emit diff --git a/qbe_emit.mach b/qbe_emit.mach new file mode 100644 index 00000000..a3bccfc2 Binary files /dev/null and b/qbe_emit.mach differ diff --git a/regen.cm b/regen.cm index 06d0a36d..df227202 100644 --- a/regen.cm +++ b/regen.cm @@ -12,6 +12,9 @@ var files = [ {src: "parse.cm", name: "parse", out: "parse.mach"}, {src: "fold.cm", name: "fold", out: "fold.mach"}, {src: "mcode.cm", name: "mcode", out: "mcode.mach"}, + {src: "streamline.cm", name: "streamline", out: "streamline.mach"}, + {src: "qbe.cm", name: "qbe", out: "qbe.mach"}, + {src: "qbe_emit.cm", name: "qbe_emit", out: "qbe_emit.mach"}, {src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"}, {src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"} ] diff --git a/source/cell.c b/source/cell.c index d8f36e23..88426eab 100644 --- a/source/cell.c +++ b/source/cell.c @@ -323,6 +323,7 @@ int cell_init(int argc, char **argv) /* Default: run script through bootstrap pipeline */ int use_mcode = 0; + int emit_qbe = 0; int arg_start = 1; const char *shop_override = NULL; const char *core_override = NULL; @@ -332,6 +333,10 @@ int cell_init(int argc, char **argv) if (strcmp(argv[arg_start], "--mcode") == 0) { use_mcode = 1; arg_start++; + } else if (strcmp(argv[arg_start], "--emit-qbe") == 0) { + use_mcode = 1; // QBE requires mcode pipeline + emit_qbe = 1; + arg_start++; } else if (strcmp(argv[arg_start], "--shop") == 0) { if (arg_start + 1 >= argc) { printf("ERROR: --shop requires a path argument\n"); @@ -416,6 +421,7 @@ int cell_init(int argc, char **argv) JS_SetPropertyStr(ctx, hidden_env, "shop_path", shop_path ? JS_NewString(ctx, shop_path) : JS_NULL); JS_SetPropertyStr(ctx, hidden_env, "use_mcode", JS_NewBool(ctx, use_mcode)); + JS_SetPropertyStr(ctx, hidden_env, "emit_qbe", JS_NewBool(ctx, emit_qbe)); JS_SetPropertyStr(ctx, hidden_env, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val)); JS_SetPropertyStr(ctx, hidden_env, "json", js_json_use(ctx)); JS_SetPropertyStr(ctx, hidden_env, "nota", js_nota_use(ctx)); diff --git a/streamline.cm b/streamline.cm new file mode 100644 index 00000000..2606395f --- /dev/null +++ b/streamline.cm @@ -0,0 +1,351 @@ +// streamline.cm — mcode IR optimizer +// Single forward pass: type inference + strength reduction + +var streamline = function(ir) { + // Type constants + var T_UNKNOWN = "unknown" + var T_INT = "int" + var T_FLOAT = "float" + var T_NUM = "num" + var T_TEXT = "text" + var T_BOOL = "bool" + var T_NULL = "null" + + // Integer arithmetic ops that produce integer results + var int_result_ops = { + add_int: true, sub_int: true, mul_int: true, + div_int: true, mod_int: true + } + + // Float arithmetic ops that produce float results + var float_result_ops = { + add_float: true, sub_float: true, mul_float: true, + div_float: true, mod_float: true + } + + // Comparison ops that produce bool results + var bool_result_ops = { + eq_int: true, ne_int: true, lt_int: true, gt_int: true, + le_int: true, ge_int: true, + eq_float: true, ne_float: true, lt_float: true, gt_float: true, + le_float: true, ge_float: true, + eq_text: true, ne_text: true, lt_text: true, gt_text: true, + le_text: true, ge_text: true, + eq_bool: true, ne_bool: true, + eq_tol: true, ne_tol: true, + not: true, and: true, or: true, + is_int: true, is_text: true, is_num: true, + is_bool: true, is_null: true, is_identical: true + } + + // Type check opcodes and what type they verify + var type_check_map = { + is_int: T_INT, + is_text: T_TEXT, + is_num: T_NUM, + is_bool: T_BOOL, + is_null: T_NULL + } + + // Determine the type of an access literal value + var access_value_type = function(val) { + if (is_number(val)) { + if (is_integer(val)) { + return T_INT + } + return T_FLOAT + } + if (is_text(val)) { + return T_TEXT + } + return T_UNKNOWN + } + + // Update slot_types for an instruction (shared tracking logic) + var track_types = function(slot_types, instr) { + var op = instr[0] + var src_type = null + + if (op == "access") { + slot_types[text(instr[1])] = access_value_type(instr[2]) + } else if (op == "int") { + slot_types[text(instr[1])] = T_INT + } else if (op == "true" || op == "false") { + slot_types[text(instr[1])] = T_BOOL + } else if (op == "null") { + slot_types[text(instr[1])] = T_NULL + } else if (op == "move") { + src_type = slot_types[text(instr[2])] + if (src_type != null) { + slot_types[text(instr[1])] = src_type + } else { + slot_types[text(instr[1])] = T_UNKNOWN + } + } else if (int_result_ops[op] == true) { + slot_types[text(instr[1])] = T_INT + } else if (float_result_ops[op] == true) { + slot_types[text(instr[1])] = T_FLOAT + } else if (op == "concat") { + slot_types[text(instr[1])] = T_TEXT + } else if (bool_result_ops[op] == true) { + slot_types[text(instr[1])] = T_BOOL + } else if (op == "load_field" || op == "load_index" || op == "load_dynamic") { + slot_types[text(instr[1])] = T_UNKNOWN + } else if (op == "invoke") { + slot_types[text(instr[2])] = T_UNKNOWN + } else if (op == "pop" || op == "get" || op == "function") { + slot_types[text(instr[1])] = T_UNKNOWN + } else if (op == "typeof") { + slot_types[text(instr[1])] = T_TEXT + } else if (op == "neg_int") { + slot_types[text(instr[1])] = T_INT + } else if (op == "neg_float") { + slot_types[text(instr[1])] = T_FLOAT + } else if (op == "bitnot" || op == "bitand" || op == "bitor" || + op == "bitxor" || op == "shl" || op == "shr" || op == "ushr") { + slot_types[text(instr[1])] = T_INT + } + return null + } + + // Check if a slot has a known type (with T_NUM subsumption) + var slot_is = function(slot_types, slot, typ) { + var known = slot_types[text(slot)] + if (known == null) { + return false + } + if (known == typ) { + return true + } + if (typ == T_NUM && (known == T_INT || known == T_FLOAT)) { + return true + } + return false + } + + // Optimize a single function's instructions + var optimize_function = function(func) { + var instructions = func.instructions + var num_instr = 0 + var slot_types = null + var nop_counter = 0 + var i = 0 + var instr = null + var op = null + var dest = 0 + var src = 0 + var checked_type = null + var next = null + var next_op = null + var target_label = null + var src_known = null + var jlen = 0 + var j = 0 + var peek = null + + if (instructions == null || length(instructions) == 0) { + return null + } + + num_instr = length(instructions) + slot_types = {} + + // Peephole optimization pass: type tracking + strength reduction + i = 0 + while (i < num_instr) { + instr = instructions[i] + + // Labels are join points: clear all type info (conservative) + if (is_text(instr)) { + slot_types = {} + i = i + 1 + continue + } + + if (!is_array(instr)) { + i = i + 1 + continue + } + + op = instr[0] + + // --- Peephole: type-check + jump where we know the type --- + if (type_check_map[op] != null && i + 1 < num_instr) { + dest = instr[1] + src = instr[2] + checked_type = type_check_map[op] + next = instructions[i + 1] + + if (is_array(next)) { + next_op = next[0] + + // Pattern: is_ t, x -> jump_false t, label + if (next_op == "jump_false" && next[1] == dest) { + target_label = next[2] + + if (slot_is(slot_types, src, checked_type)) { + // Known match: check always true, never jumps — eliminate both + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + nop_counter = nop_counter + 1 + instructions[i + 1] = "_nop_" + text(nop_counter) + slot_types[text(dest)] = T_BOOL + i = i + 2 + continue + } + + src_known = slot_types[text(src)] + if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) { + // Check for T_NUM subsumption: INT and FLOAT match T_NUM + if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) { + // Actually matches — eliminate both + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + nop_counter = nop_counter + 1 + instructions[i + 1] = "_nop_" + text(nop_counter) + slot_types[text(dest)] = T_BOOL + i = i + 2 + continue + } + // Known mismatch: always jumps — nop the check, rewrite jump + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + jlen = length(next) + instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]] + slot_types[text(dest)] = T_UNKNOWN + i = i + 2 + continue + } + + // Unknown: can't eliminate, but narrow type on fallthrough + slot_types[text(dest)] = T_BOOL + slot_types[text(src)] = checked_type + i = i + 2 + continue + } + + // Pattern: is_ t, x -> jump_true t, label + if (next_op == "jump_true" && next[1] == dest) { + target_label = next[2] + + if (slot_is(slot_types, src, checked_type)) { + // Known match: always true, always jumps — nop check, rewrite to jump + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + jlen = length(next) + instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]] + slot_types[text(dest)] = T_BOOL + i = i + 2 + continue + } + + src_known = slot_types[text(src)] + if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) { + if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) { + // Actually matches T_NUM — always jumps + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + jlen = length(next) + instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]] + slot_types[text(dest)] = T_BOOL + i = i + 2 + continue + } + // Known mismatch: never jumps — eliminate both + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + nop_counter = nop_counter + 1 + instructions[i + 1] = "_nop_" + text(nop_counter) + slot_types[text(dest)] = T_BOOL + i = i + 2 + continue + } + + // Unknown: can't optimize + slot_types[text(dest)] = T_BOOL + i = i + 2 + continue + } + } + + // Standalone type check (no jump following): just track the result + slot_types[text(dest)] = T_BOOL + i = i + 1 + continue + } + + // --- Strength reduction: load_dynamic / store_dynamic --- + if (op == "load_dynamic") { + if (slot_is(slot_types, instr[3], T_TEXT)) { + instr[0] = "load_field" + } else if (slot_is(slot_types, instr[3], T_INT)) { + instr[0] = "load_index" + } + slot_types[text(instr[1])] = T_UNKNOWN + i = i + 1 + continue + } + if (op == "store_dynamic") { + if (slot_is(slot_types, instr[3], T_TEXT)) { + instr[0] = "store_field" + } else if (slot_is(slot_types, instr[3], T_INT)) { + instr[0] = "store_index" + } + i = i + 1 + continue + } + + // --- Standard type tracking --- + track_types(slot_types, instr) + + i = i + 1 + } + + // Second pass: remove dead jumps (jump to the immediately next label) + i = 0 + while (i < num_instr) { + instr = instructions[i] + if (is_array(instr) && instr[0] == "jump") { + target_label = instr[1] + // Check if the very next non-nop item is that label + j = i + 1 + while (j < num_instr) { + peek = instructions[j] + if (is_text(peek)) { + if (peek == target_label) { + nop_counter = nop_counter + 1 + instructions[i] = "_nop_" + text(nop_counter) + } + break + } + if (is_array(peek)) { + break + } + j = j + 1 + } + } + i = i + 1 + } + + return null + } + + // Process main function + if (ir.main != null) { + optimize_function(ir.main) + } + + // Process all sub-functions + var fi = 0 + if (ir.functions != null) { + fi = 0 + while (fi < length(ir.functions)) { + optimize_function(ir.functions[fi]) + fi = fi + 1 + } + } + + return ir +} + +return streamline diff --git a/streamline.mach b/streamline.mach new file mode 100644 index 00000000..3ccf6a76 Binary files /dev/null and b/streamline.mach differ