Merge branch 'mach' into mcode2

2026-02-10 19:04:22 -06:00
parent b65db63447 0d47002167
commit ed4a5474d5
25 changed files with 2187 additions and 116 deletions
--- a/compile.ce
+++ b/compile.ce
@@ -0,0 +1,100 @@
+// compile.ce — compile a .cm module to native .dylib via QBE
+//
+// Usage:
+//   cell --core . compile.ce <file.cm>
+//
+// Produces <file>.dylib in the current directory.
+
+var fd = use('fd')
+var os = use('os')
+
+if (length(args) < 1) {
+  print('usage: cell --core . compile.ce <file.cm>')
+  return
+}
+
+var file = args[0]
+var base = file
+if (ends_with(base, '.cm')) {
+  base = text(base, 0, length(base) - 3)
+}
+
+var safe = replace(replace(base, '/', '_'), '-', '_')
+var symbol = 'js_' + safe + '_use'
+var tmp = '/tmp/qbe_' + safe
+var ssa_path = tmp + '.ssa'
+var s_path = tmp + '.s'
+var o_path = tmp + '.o'
+var rt_o_path = '/tmp/qbe_rt.o'
+var dylib_path = base + '.dylib'
+var cwd = fd.getcwd()
+var rc = 0
+
+// Step 1: emit QBE IL
+print('emit qbe...')
+rc = os.system('cd ' + cwd + ' && ./cell --core . --emit-qbe ' + file + ' > ' + ssa_path)
+if (rc != 0) {
+  print('failed to emit qbe il')
+  return
+}
+
+// Step 2: post-process — insert dead labels after ret/jmp, append wrapper
+// Use awk via shell to avoid blob/slurpwrite issues with long strings
+print('post-process...')
+var awk_cmd = `awk '
+  /^[[:space:]]*ret / || /^[[:space:]]*jmp / { need_label=1; print; next }
+  need_label && /^[[:space:]]*[^@}]/ && NF > 0 {
+    print "@_dead_" dead_id; dead_id++; need_label=0
+  }
+  /^@/ || /^}/ || NF==0 { need_label=0 }
+  { print }
+' ` + ssa_path + ` > ` + tmp + `_fixed.ssa`
+rc = os.system(awk_cmd)
+if (rc != 0) {
+  print('post-process failed')
+  return
+}
+
+// Append wrapper function
+var wrapper_cmd = `printf '\nexport function l $` + symbol + `(l %%ctx) {\n@entry\n  %%frame =l alloc8 4096\n  %%result =l call $cell_main(l %%ctx, l %%frame)\n  ret %%result\n}\n' >> ` + tmp + `_fixed.ssa`
+rc = os.system(wrapper_cmd)
+if (rc != 0) {
+  print('wrapper append failed')
+  return
+}
+
+// Step 3: compile QBE IL to assembly
+print('qbe compile...')
+rc = os.system('~/.local/bin/qbe -o ' + s_path + ' ' + tmp + '_fixed.ssa')
+if (rc != 0) {
+  print('qbe compilation failed')
+  return
+}
+
+// Step 4: assemble
+print('assemble...')
+rc = os.system('cc -c ' + s_path + ' -o ' + o_path)
+if (rc != 0) {
+  print('assembly failed')
+  return
+}
+
+// Step 5: compile runtime stubs (cached — skip if already built)
+if (!fd.is_file(rt_o_path)) {
+  print('compile runtime stubs...')
+  rc = os.system('cc -c ' + cwd + '/qbe_rt.c -o ' + rt_o_path + ' -fPIC')
+  if (rc != 0) {
+    print('runtime stubs compilation failed')
+    return
+  }
+}
+
+// Step 6: link dylib
+print('link...')
+rc = os.system('cc -shared -fPIC -undefined dynamic_lookup ' + o_path + ' ' + rt_o_path + ' -o ' + cwd + '/' + dylib_path)
+if (rc != 0) {
+  print('linking failed')
+  return
+}
+
+print('built: ' + dylib_path)
--- a/docs/spec/c-runtime.md
+++ b/docs/spec/c-runtime.md
@@ -0,0 +1,296 @@
+---
+title: "C Runtime for Native Code"
+description: "Minimum C runtime surface for QBE-generated native code"
+---
+
+## Overview
+
+QBE-generated native code calls into a C runtime for anything that touches the heap, dispatches dynamically, or requires GC awareness. The design principle: **native code handles control flow and integer math directly; everything else is a runtime call.**
+
+This document defines the runtime boundary — what must be in C, what QBE handles inline, and how to organize the C code to serve both the mcode interpreter and native code cleanly.
+
+## The Boundary
+
+### What native code does inline (no C calls)
+
+These operations compile to straight QBE instructions with no runtime involvement:
+
+- **Integer arithmetic**: `add`, `sub`, `mul` on NaN-boxed ints (shift right 1, operate, shift left 1)
+- **Integer comparisons**: extract int with shift, compare, produce tagged bool
+- **Control flow**: jumps, branches, labels, function entry/exit
+- **Slot access**: load/store to frame slots via `%fp` + offset
+- **NaN-box tagging**: integer tagging (`n << 1`), bool constants (`0x03`/`0x23`), null (`0x07`)
+- **Type tests**: `JS_IsInt` (LSB check), `JS_IsNumber`, `JS_IsText`, `JS_IsNull` — these are bit tests on the value, no heap access needed
+
+### What requires a C call
+
+Anything that:
+1. **Allocates** (arrays, records, strings, frames, function objects)
+2. **Touches the heap** (property get/set, array indexing, closure access)
+3. **Dispatches on type at runtime** (dynamic load/store, polymorphic arithmetic)
+4. **Calls user functions** (frame setup, argument passing, invocation)
+5. **Does string operations** (concatenation, comparison, conversion)
+
+## Runtime Functions
+
+### Tier 1: Essential (must exist for any program to run)
+
+These are called by virtually every QBE program.
+
+#### Intrinsic Lookup
+
+```c
+// Look up a built-in function by name. Called once per intrinsic per callsite.
+JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name);
+```
+
+Maps name → C function pointer wrapped in JSValue. This is the primary entry point for all built-in functions (`print`, `text`, `length`, `is_array`, etc). The native code never calls intrinsics directly — it always goes through `get_intrinsic` → `frame` → `invoke`.
+
+#### Function Calls
+
+```c
+// Allocate a call frame with space for nr_args arguments.
+JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int nr_args);
+
+// Set argument idx in the frame.
+void cell_rt_setarg(JSValue frame, int idx, JSValue val);
+
+// Execute the function. Returns the result.
+JSValue cell_rt_invoke(JSContext *ctx, JSValue frame);
+```
+
+This is the universal calling convention. Every function call — user functions, intrinsics, methods — goes through frame/setarg/invoke. The frame allocates a `JSFrameRegister` on the GC heap, setarg fills slots, invoke dispatches.
+
+**Tail call variants:**
+
+```c
+JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int nr_args);
+void    cell_rt_goinvoke(JSContext *ctx, JSValue frame);
+```
+
+Same as frame/invoke but reuse the caller's stack position.
+
+### Tier 2: Property Access (needed by any program using records or arrays)
+
+```c
+// Record field by constant name.
+JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name);
+void    cell_rt_store_field(JSContext *ctx, JSValue obj, JSValue val, const char *name);
+
+// Array element by integer index.
+JSValue cell_rt_load_index(JSContext *ctx, JSValue obj, JSValue idx);
+void    cell_rt_store_index(JSContext *ctx, JSValue obj, JSValue idx, JSValue val);
+
+// Dynamic — type of key unknown at compile time.
+JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key);
+void    cell_rt_store_dynamic(JSContext *ctx, JSValue obj, JSValue key, JSValue val);
+```
+
+The typed variants (`load_field`/`load_index`) skip the key-type dispatch that `load_dynamic` must do. When parse and fold provide type information, QBE emit selects the typed variant and the streamline optimizer can narrow dynamic → typed.
+
+**Implementation**: These are thin wrappers around existing `JS_GetPropertyStr`/`JS_GetPropertyNumber`/`JS_GetProperty` and their `Set` counterparts.
+
+### Tier 3: Closures (needed by programs with nested functions)
+
+```c
+// Walk depth levels up the frame chain, read slot.
+JSValue cell_rt_get_closure(JSContext *ctx, JSValue fp, int depth, int slot);
+
+// Walk depth levels up, write slot.
+void    cell_rt_put_closure(JSContext *ctx, JSValue fp, JSValue val, int depth, int slot);
+```
+
+Closure variables live in outer frames. `depth` is how many `caller` links to follow; `slot` is the register index in that frame.
+
+### Tier 4: Object Construction (needed by programs creating arrays/records/functions)
+
+```c
+// Create a function object from a compiled function index.
+// The native code loader must maintain a function table.
+JSValue cell_rt_make_function(JSContext *ctx, int fn_id);
+```
+
+Array and record literals are currently compiled as intrinsic calls (`array(...)`, direct `{...}` construction) which go through the frame/invoke path. A future optimization could add:
+
+```c
+// Fast paths (optional, not yet needed)
+JSValue cell_rt_new_array(JSContext *ctx, int len);
+JSValue cell_rt_new_record(JSContext *ctx);
+```
+
+### Tier 5: Collection Operations
+
+```c
+// a[] = val (push) and var v = a[] (pop)
+void    cell_rt_push(JSContext *ctx, JSValue arr, JSValue val);
+JSValue cell_rt_pop(JSContext *ctx, JSValue arr);
+```
+
+### Tier 6: Error Handling
+
+```c
+// Trigger disruption. Jumps to the disrupt handler or unwinds.
+void cell_rt_disrupt(JSContext *ctx);
+```
+
+### Tier 7: Miscellaneous
+
+```c
+JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key);
+JSValue cell_rt_typeof(JSContext *ctx, JSValue val);
+```
+
+### Tier 8: String and Float Helpers (called from QBE inline code, not from qbe_emit)
+
+These are called from the QBE IL that `qbe.cm` generates inline for arithmetic and comparison operations. They're not `cell_rt_` prefixed — they're lower-level:
+
+```c
+// Float arithmetic (when operands aren't both ints)
+JSValue qbe_float_add(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_float_sub(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_float_mul(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_float_div(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_float_mod(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_float_pow(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_float_neg(JSContext *ctx, JSValue v);
+JSValue qbe_float_inc(JSContext *ctx, JSValue v);
+JSValue qbe_float_dec(JSContext *ctx, JSValue v);
+
+// Float comparison (returns C int 0/1 for QBE branching)
+int qbe_float_cmp(JSContext *ctx, int op, JSValue a, JSValue b);
+
+// Bitwise ops on non-int values (convert to int32 first)
+JSValue qbe_bnot(JSContext *ctx, JSValue v);
+JSValue qbe_bitwise_and(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_bitwise_or(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_bitwise_xor(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_shift_shl(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_shift_sar(JSContext *ctx, JSValue a, JSValue b);
+JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b);
+
+// String operations
+JSValue JS_ConcatString(JSContext *ctx, JSValue a, JSValue b);
+int     js_string_compare_value(JSContext *ctx, JSValue a, JSValue b, int eq_only);
+JSValue JS_NewString(JSContext *ctx, const char *str);
+JSValue __JS_NewFloat64(JSContext *ctx, double d);
+int     JS_ToBool(JSContext *ctx, JSValue v);
+
+// String/number type tests (inline-able but currently calls)
+int     JS_IsText(JSValue v);
+int     JS_IsNumber(JSValue v);
+
+// Tolerant equality (== on mixed types)
+JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b);
+JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b);
+
+// Text ordering comparisons
+JSValue cell_rt_lt_text(JSContext *ctx, JSValue a, JSValue b);
+JSValue cell_rt_le_text(JSContext *ctx, JSValue a, JSValue b);
+JSValue cell_rt_gt_text(JSContext *ctx, JSValue a, JSValue b);
+JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b);
+```
+
+## What Exists vs What Needs Writing
+
+### Already exists (in qbe_helpers.c)
+
+All `qbe_float_*`, `qbe_bnot`, `qbe_bitwise_*`, `qbe_shift_*`, `qbe_to_bool` — these are implemented and working.
+
+### Already exists (in runtime.c / quickjs.c) but not yet wrapped
+
+The underlying operations exist but aren't exposed with the `cell_rt_` names:
+
+| Runtime function | Underlying implementation |
+|---|---|
+| `cell_rt_load_field` | `JS_GetPropertyStr(ctx, obj, name)` |
+| `cell_rt_load_index` | `JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx))` |
+| `cell_rt_load_dynamic` | `JS_GetProperty(ctx, obj, key)` |
+| `cell_rt_store_field` | `JS_SetPropertyStr(ctx, obj, name, val)` |
+| `cell_rt_store_index` | `JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx), val)` |
+| `cell_rt_store_dynamic` | `JS_SetProperty(ctx, obj, key, val)` |
+| `cell_rt_delete` | `JS_DeleteProperty(ctx, obj, key)` |
+| `cell_rt_push` | `JS_ArrayPush(ctx, &arr, val)` |
+| `cell_rt_pop` | `JS_ArrayPop(ctx, arr)` |
+| `cell_rt_typeof` | type tag switch → `JS_NewString` |
+| `cell_rt_disrupt` | `JS_Throw(ctx, ...)` |
+| `cell_rt_eq_tol` / `cell_rt_ne_tol` | comparison logic in mcode.c `eq_tol`/`ne_tol` handler |
+| `cell_rt_lt_text` etc. | `js_string_compare_value` + wrap result |
+
+### Needs new code
+
+| Runtime function | What's needed |
+|---|---|
+| `cell_rt_get_intrinsic` | Look up intrinsic by name string, return JSValue function. Currently scattered across `js_cell_intrinsic_get` and the mcode handler. Needs a clean single entry point. |
+| `cell_rt_frame` | Allocate `JSFrameRegister`, set function slot, set argc. Exists in mcode.c `frame` handler but not as a callable function. |
+| `cell_rt_setarg` | Write to frame slot. Trivial: `frame->slots[idx + 1] = val` (slot 0 is `this`). |
+| `cell_rt_invoke` | Call the function in the frame. Needs to dispatch: native C function vs mach bytecode vs mcode. This is the critical piece — it must handle all function types. |
+| `cell_rt_goframe` / `cell_rt_goinvoke` | Tail call variants. Similar to frame/invoke but reuse caller frame. |
+| `cell_rt_make_function` | Create function object from index. Needs a function table (populated by the native loader). |
+| `cell_rt_get_closure` / `cell_rt_put_closure` | Walk frame chain. Exists inline in mcode.c `get`/`put` handlers. |
+
+## Recommended C File Organization
+
+```
+source/
+  cell_runtime.c    — NEW: all cell_rt_* functions (the native code API)
+  qbe_helpers.c     — existing: float/bitwise/shift helpers for inline QBE
+  runtime.c         — existing: JS_GetProperty, JS_SetProperty, etc.
+  quickjs.c         — existing: core VM, GC, value representation
+  mcode.c           — existing: mcode interpreter (can delegate to cell_runtime.c)
+```
+
+**`cell_runtime.c`** is the single file that defines the native code contract. It should:
+
+1. Include `quickjs-internal.h` for access to value representation and heap types
+2. Export all `cell_rt_*` functions with C linkage (no `static`)
+3. Keep each function thin — delegate to existing `JS_*` functions where possible
+4. Handle GC safety: after any allocation (frame, string, array), callers' frames may have moved
+
+### Implementation Priority
+
+**Phase 1** — Get "hello world" running natively:
+- `cell_rt_get_intrinsic` (to find `print` and `text`)
+- `cell_rt_frame`, `cell_rt_setarg`, `cell_rt_invoke` (to call them)
+- A loader that takes QBE output → assembles → links → calls `cell_main`
+
+**Phase 2** — Variables and arithmetic:
+- All property access (`load_field`, `load_index`, `store_*`, `load_dynamic`)
+- `cell_rt_make_function`, `cell_rt_get_closure`, `cell_rt_put_closure`
+
+**Phase 3** — Full language:
+- `cell_rt_push`, `cell_rt_pop`, `cell_rt_delete`, `cell_rt_typeof`
+- `cell_rt_disrupt`
+- `cell_rt_goframe`, `cell_rt_goinvoke`
+- Text comparison wrappers (`cell_rt_lt_text`, etc.)
+- Tolerant equality (`cell_rt_eq_tol`, `cell_rt_ne_tol`)
+
+## Calling Convention
+
+All `cell_rt_*` functions follow the same pattern:
+
+- First argument is always `JSContext *ctx`
+- Values are passed/returned as `JSValue` (64-bit, by value)
+- Frame pointers are `JSValue` (tagged pointer to `JSFrameRegister`)
+- String names are `const char *` (pointer to data section label)
+- Integer constants (slot indices, arg counts) are `int` / `long`
+
+Native code maintains `%ctx` (JSContext) and `%fp` (current frame pointer) as persistent values across the function body. All slot reads/writes go through `%fp` + offset.
+
+## What Should NOT Be in the C Runtime
+
+These are handled entirely by QBE-generated code:
+
+- **Integer arithmetic and comparisons** — bit operations on NaN-boxed values
+- **Control flow** — branches, loops, labels, jumps
+- **Boolean logic** — `and`/`or`/`not` on tagged values
+- **Constant loading** — integer constants are immediate, strings are data labels
+- **Type guard branches** — the `is_int`/`is_text`/`is_null` checks are inline bit tests; the branch to the float or text path is just a QBE `jnz`
+
+The `qbe.cm` macros already handle all of this. The arithmetic path looks like:
+
+```
+check both ints? → yes → inline int add → done
+                 → no  → call qbe_float_add (or JS_ConcatString for text)
+```
+
+The C runtime is only called on the slow paths (float, text, dynamic dispatch). The fast path (integer arithmetic, comparisons, branching) is fully native.
--- a/docs/spec/mcode.md
+++ b/docs/spec/mcode.md
@@ -10,12 +10,11 @@ Mcode is a JSON-based intermediate representation that can be interpreted direct
 ## Pipeline

 ```
-Source → Tokenize → Parse (AST) → Mcode (JSON) → Interpret
-                                                → Compile to Mach (planned)
-                                                → Compile to native (planned)
+Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Interpret
+                                                                    → QBE → Native
 ```

-Mcode is produced by the `JS_Mcode` compiler pass, which emits a cJSON tree. The mcode interpreter walks this tree directly, dispatching on instruction name strings.
+Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result can be interpreted by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview.

 ## JSMCode Structure

@@ -44,16 +43,37 @@ struct JSMCode {

 ## Instruction Format

-Each instruction is a JSON array. The first element is the instruction name (string), followed by operands:
+Each instruction is a JSON array. The first element is the instruction name (string), followed by operands (typically `[op, dest, ...args, line, col]`):

 ```json
-["LOADK", 0, 42]
-["ADD", 2, 0, 1]
-["JMPFALSE", 3, "else_label"]
-["CALL", 0, 2, 1]
+["access", 3, 5, 1, 9]
+["load_index", 10, 4, 9, 5, 11]
+["store_dynamic", 4, 11, 12, 6, 3]
+["frame", 15, 14, 1, 7, 7]
+["setarg", 15, 0, 16, 7, 7]
+["invoke", 15, 13, 7, 7]
 ```

-The instruction set mirrors the Mach VM opcodes — same operations, same register semantics, but with string dispatch instead of numeric opcodes.
+### Typed Load/Store
+
+Memory operations come in typed variants for optimization:
+
+- `load_index dest, obj, idx` — array element by integer index
+- `load_field dest, obj, key` — record property by string key
+- `load_dynamic dest, obj, key` — unknown; dispatches at runtime
+- `store_index obj, val, idx` — array element store
+- `store_field obj, val, key` — record property store
+- `store_dynamic obj, val, key` — unknown; dispatches at runtime
+
+The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold.
+
+### Decomposed Calls
+
+Function calls are split into separate instructions:
+
+- `frame dest, fn, argc` — allocate call frame
+- `setarg frame, idx, val` — set argument
+- `invoke frame, result` — execute the call

 ## Labels

--- a/docs/spec/pipeline.md
+++ b/docs/spec/pipeline.md
@@ -0,0 +1,118 @@
+---
+title: "Compilation Pipeline"
+description: "Overview of the compilation stages and optimizations"
+---
+
+## Overview
+
+The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental).
+
+```
+Source → Tokenize → Parse → Fold → Mach VM (default)
+                                  → Mcode → Streamline → Mcode Interpreter
+                                                        → QBE → Native
+```
+
+## Stages
+
+### Tokenize (`tokenize.cm`)
+
+Splits source text into tokens. Handles string interpolation by re-tokenizing template literal contents. Produces a token array with position information (line, column).
+
+### Parse (`parse.cm`)
+
+Converts tokens into an AST. Also performs semantic analysis:
+
+- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`.
+- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`, `"null"`.
+- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`.
+- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise.
+- **Tail position**: Return statements where the expression is a call get `tail: true`.
+
+### Fold (`fold.cm`)
+
+Operates on the AST. Performs constant folding and type analysis:
+
+- **Constant folding**: Evaluates arithmetic on known constants at compile time (e.g., `5 + 10` becomes `15`).
+- **Constant propagation**: Tracks `def` bindings whose values are known constants.
+- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites.
+- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`.
+- **Purity marking**: Stamps `pure: true` on expressions with no side effects (literals, name references, arithmetic on pure operands).
+- **Dead code elimination**: Removes unreachable branches when conditions are known constants.
+
+### Mcode (`mcode.cm`)
+
+Lowers the AST to a JSON-based intermediate representation with explicit operations. Key design principle: **every type check is an explicit instruction** so downstream optimizers can see and eliminate them.
+
+- **Typed load/store**: Emits `load_index` (array by integer), `load_field` (record by string), or `load_dynamic` (unknown) based on type information from fold.
+- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call).
+- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup.
+
+See [Mcode IR](mcode.md) for instruction format details.
+
+### Streamline (`streamline.cm`)
+
+Optimizes the Mcode IR. Operates per-function:
+
+- **Redundant instruction elimination**: Removes no-op patterns and redundant moves.
+- **Dead code removal**: Eliminates instructions whose results are never used.
+- **Type-based narrowing**: When type information is available, narrows `load_dynamic`/`store_dynamic` to typed variants.
+
+### QBE Emit (`qbe_emit.cm`)
+
+Lowers optimized Mcode IR to QBE intermediate language for native code compilation. Each Mcode function becomes a QBE function that calls into the cell runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.).
+
+String constants are interned in a data section. Integer constants are NaN-boxed inline.
+
+### QBE Macros (`qbe.cm`)
+
+Provides operation implementations as QBE IL templates. Each arithmetic, comparison, and type operation is defined as a function that emits the corresponding QBE instructions, handling type dispatch (integer, float, text paths) with proper guard checks.
+
+## Execution Backends
+
+### Mach VM (default)
+
+Binary 32-bit register VM. Used for production execution and bootstrapping.
+
+```
+./cell script.ce
+```
+
+### Mcode Interpreter
+
+JSON-based interpreter. Used for debugging the compilation pipeline.
+
+```
+./cell --mcode script.ce
+```
+
+### QBE Native (experimental)
+
+Generates QBE IL that can be compiled to native code.
+
+```
+./cell --emit-qbe script.ce > output.ssa
+```
+
+## Files
+
+| File | Role |
+|------|------|
+| `tokenize.cm` | Lexer |
+| `parse.cm` | Parser + semantic analysis |
+| `fold.cm` | Constant folding + type analysis |
+| `mcode.cm` | AST → Mcode IR lowering |
+| `streamline.cm` | Mcode IR optimizer |
+| `qbe_emit.cm` | Mcode IR → QBE IL emitter |
+| `qbe.cm` | QBE IL operation templates |
+| `internal/bootstrap.cm` | Pipeline orchestrator |
+
+## Test Files
+
+| File | Tests |
+|------|-------|
+| `parse_test.ce` | Type tags, access_kind, intrinsic resolution |
+| `fold_test.ce` | Type propagation, purity, intrinsic hints |
+| `mcode_test.ce` | Typed load/store, decomposed calls |
+| `streamline_test.ce` | Optimization counts, IR before/after |
+| `qbe_test.ce` | End-to-end QBE IL generation |
--- a/fold.cm
+++ b/fold.cm
@@ -158,6 +158,7 @@ var fold = function(ast) {
    var name = null
    var sv = null
    var item = null
+    var rhs_target = null
    while (i < length(stmts)) {
      stmt = stmts[i]
      kind = stmt.kind
@@ -169,6 +170,19 @@ var fold = function(ast) {
            register_const(fn_nr, name, stmt.right)
          }
        }
+        if (name != null && stmt.right != null && stmt.right.kind == "(") {
+          rhs_target = stmt.right.expression
+          if (rhs_target != null && rhs_target.intrinsic == true) {
+            sv = scope_var(fn_nr, name)
+            if (sv != null && sv.type_tag == null) {
+              if (rhs_target.name == "array") sv.type_tag = "array"
+              else if (rhs_target.name == "record") sv.type_tag = "record"
+              else if (rhs_target.name == "text") sv.type_tag = "text"
+              else if (rhs_target.name == "number") sv.type_tag = "number"
+              else if (rhs_target.name == "blob") sv.type_tag = "blob"
+            }
+          }
+        }
      } else if (kind == "function") {
        name = stmt.name
        if (name != null && stmt.arity != null) {
@@ -320,6 +334,8 @@ var fold = function(ast) {
    var ar = null
    var akey = null
    var tv = null
+    var att = null
+    var arg = null

    // Recurse into children first (bottom-up)
    if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" ||
@@ -385,6 +401,10 @@ var fold = function(ast) {
          return copy_loc(expr, {kind: lit.kind, value: lit.value, number: lit.number})
        }
      }
+      sv = scope_var(fn_nr, expr.name)
+      if (sv != null && sv.type_tag != null) {
+        expr.type_tag = sv.type_tag
+      }
      return expr
    }

@@ -497,7 +517,7 @@ var fold = function(ast) {
      return expr
    }

-    // Call: stamp arity
+    // Call: stamp arity and fold intrinsic type checks
    if (k == "(") {
      target = expr.expression
      if (target != null && target.kind == "name" && target.level == 0) {
@@ -506,6 +526,30 @@ var fold = function(ast) {
        if (fn_arities[akey] != null) ar = fn_arities[akey][target.name]
        if (ar != null) expr.arity = ar
      }
+      if (target != null && target.intrinsic == true && length(expr.list) == 1) {
+        arg = expr.list[0]
+        att = null
+        if (arg.type_tag != null) {
+          att = arg.type_tag
+        } else if (arg.kind == "name" && arg.level == 0) {
+          sv = scope_var(fn_nr, arg.name)
+          if (sv != null) att = sv.type_tag
+        }
+        if (att != null) {
+          if (target.name == "is_array") return make_bool(att == "array", expr)
+          if (target.name == "is_text") return make_bool(att == "text", expr)
+          if (target.name == "is_number") return make_bool(att == "number" || att == "integer", expr)
+          if (target.name == "is_integer") return make_bool(att == "integer", expr)
+          if (target.name == "is_function") return make_bool(att == "function", expr)
+          if (target.name == "is_logical") return make_bool(att == "logical", expr)
+          if (target.name == "is_null") return make_bool(att == "null", expr)
+          if (target.name == "is_object") return make_bool(att == "record", expr)
+          if (target.name == "length") {
+            if (att == "array") expr.hint = "array_length"
+            else if (att == "text") expr.hint = "text_length"
+          }
+        }
+      }
      return expr
    }

@@ -525,6 +569,7 @@ var fold = function(ast) {

    if (k == "var" || k == "def") {
      stmt.right = fold_expr(stmt.right, fn_nr)
+      if (is_pure(stmt.right)) stmt.pure = true
      return stmt
    }
    if (k == "var_list") {
--- a/fold.mach
+++ b/fold.mach
--- a/internal/bootstrap.cm
+++ b/internal/bootstrap.cm
@@ -41,11 +41,17 @@ var boot_env = {use: use_basic}
 var tokenize_mod = boot_load("tokenize", boot_env)
 var parse_mod = boot_load("parse", boot_env)
 var fold_mod = boot_load("fold", boot_env)
+use_cache['tokenize'] = tokenize_mod
+use_cache['parse'] = parse_mod
+use_cache['fold'] = fold_mod

 // Optionally load mcode compiler module
 var mcode_mod = null
+var streamline_mod = null
+var qbe_emit_mod = null
 if (use_mcode) {
  mcode_mod = boot_load("mcode", boot_env)
+  use_cache['mcode'] = mcode_mod
 }

 // Warn if any .cm source is newer than its .mach bytecode
@@ -55,6 +61,9 @@ function check_mach_stale() {
    ["parse.cm", "parse.mach"],
    ["fold.cm", "fold.mach"],
    ["mcode.cm", "mcode.mach"],
+    ["streamline.cm", "streamline.mach"],
+    ["qbe.cm", "qbe.mach"],
+    ["qbe_emit.cm", "qbe_emit.mach"],
    ["internal/bootstrap.cm", "internal/bootstrap.mach"],
    ["internal/engine.cm", "internal/engine.mach"]
  ]
@@ -118,26 +127,78 @@ function analyze(src, filename) {
  return ast
 }

+// Load a module from .mach bytecode, falling back to source compilation
+function load_module(name, env) {
+  var mach_path = core_path + '/' + name + ".mach"
+  var data = null
+  var src_path = null
+  var src = null
+  var ast = null
+  if (fd.is_file(mach_path)) {
+    data = fd.slurp(mach_path)
+    return mach_load(data, env)
+  }
+  src_path = core_path + '/' + name + ".cm"
+  src = text(fd.slurp(src_path))
+  ast = analyze(src, src_path)
+  return mach_eval_ast(name, json.encode(ast), env)
+}
+
+// Load optimization pipeline modules (needs analyze to be defined)
+var qbe_macros = null
+if (use_mcode) {
+  streamline_mod = load_module("streamline", boot_env)
+  use_cache['streamline'] = streamline_mod
+  if (emit_qbe) {
+    qbe_macros = load_module("qbe", boot_env)
+    qbe_emit_mod = load_module("qbe_emit", boot_env)
+    use_cache['qbe'] = qbe_macros
+    use_cache['qbe_emit'] = qbe_emit_mod
+  }
+}
+
 // Run AST through either mcode or mach pipeline
 function run_ast(name, ast, env) {
  var compiled = null
+  var optimized = null
+  var qbe_il = null
  if (use_mcode) {
    compiled = mcode_mod(ast)
-    return mcode_run(name, json.encode(compiled), env)
+    optimized = streamline_mod(compiled)
+    if (emit_qbe) {
+      qbe_il = qbe_emit_mod(optimized, qbe_macros)
+      print(qbe_il)
+      return null
+    }
+    return mcode_run(name, json.encode(optimized), env)
  }
  return mach_eval_ast(name, json.encode(ast), env)
 }

 // use() with ƿit pipeline for .cm modules
 function use_fn(path) {
-  var file_path = path + '.cm'
+  var file_path = null
+  var mach_path = null
+  var data = null
  var script = null
  var ast = null
  var result = null
  if (use_cache[path])
    return use_cache[path]

-  // Check CWD first, then core_path
+  // Try .mach bytecode first (CWD then core_path)
+  mach_path = path + '.mach'
+  if (!fd.is_file(mach_path))
+    mach_path = core_path + '/' + path + '.mach'
+  if (fd.is_file(mach_path)) {
+    data = fd.slurp(mach_path)
+    result = mach_load(data, {use: use_fn})
+    use_cache[path] = result
+    return result
+  }
+
+  // Try .cm source (CWD then core_path)
+  file_path = path + '.cm'
  if (!fd.is_file(file_path))
    file_path = core_path + '/' + path + '.cm'

--- a/mcode.ce
+++ b/mcode.ce
@@ -2,10 +2,12 @@ var fd = use("fd")
 var json = use("json")
 var tokenize = use("tokenize")
 var parse = use("parse")
+var fold = use("fold")
 var mcode = use("mcode")
 var filename = args[0]
 var src = text(fd.slurp(filename))
 var result = tokenize(src, filename)
-var ast = parse(result.tokens, src, filename)
-var compiled = mcode(ast)
+var ast = parse(result.tokens, src, filename, tokenize)
+var folded = fold(ast)
+var compiled = mcode(folded)
 print(json.encode(compiled))
--- a/mcode.cm
+++ b/mcode.cm
@@ -51,6 +51,13 @@ var mcode = function(ast) {
  var s_cur_col = 0
  var s_filename = null

+  // Shared closure vars for binop helpers (avoids >4 param functions)
+  var _bp_dest = 0
+  var _bp_left = 0
+  var _bp_right = 0
+  var _bp_ln = null
+  var _bp_rn = null
+
  // State save/restore for nested function compilation
  var save_state = function() {
    return {
@@ -260,15 +267,19 @@ var mcode = function(ast) {
  }

  // emit_add_decomposed: int path -> text path -> float path -> disrupt
-  var emit_add_decomposed = function(dest, left, right, left_node, right_node) {
+  // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
+  var emit_add_decomposed = function() {
+    var dest = _bp_dest
+    var left = _bp_left
+    var right = _bp_right
    var t0 = 0
    var t1 = 0
-    var left_is_int = is_known_int(left_node)
-    var left_is_text = is_known_text(left_node)
-    var left_is_num = is_known_number(left_node)
-    var right_is_int = is_known_int(right_node)
-    var right_is_text = is_known_text(right_node)
-    var right_is_num = is_known_number(right_node)
+    var left_is_int = is_known_int(_bp_ln)
+    var left_is_text = is_known_text(_bp_ln)
+    var left_is_num = is_known_number(_bp_ln)
+    var right_is_int = is_known_int(_bp_rn)
+    var right_is_text = is_known_text(_bp_rn)
+    var right_is_num = is_known_number(_bp_rn)
    var not_int = null
    var not_text = null
    var done = null
@@ -346,13 +357,17 @@ var mcode = function(ast) {
  }

  // emit_numeric_binop: int path -> float path -> disrupt
-  var emit_numeric_binop = function(int_op, float_op, dest, left, right, left_node, right_node) {
+  // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
+  var emit_numeric_binop = function(int_op, float_op) {
+    var dest = _bp_dest
+    var left = _bp_left
+    var right = _bp_right
    var t0 = 0
    var t1 = 0
-    var left_is_int = is_known_int(left_node)
-    var left_is_num = is_known_number(left_node)
-    var right_is_int = is_known_int(right_node)
-    var right_is_num = is_known_number(right_node)
+    var left_is_int = is_known_int(_bp_ln)
+    var left_is_num = is_known_number(_bp_ln)
+    var right_is_int = is_known_int(_bp_rn)
+    var right_is_num = is_known_number(_bp_rn)
    var not_int = null
    var done = null
    var err = null
@@ -404,7 +419,11 @@ var mcode = function(ast) {
  }

  // emit_eq_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(false)
-  var emit_eq_decomposed = function(dest, left, right, left_node, right_node) {
+  // reads _bp_dest, _bp_left, _bp_right from closure
+  var emit_eq_decomposed = function() {
+    var dest = _bp_dest
+    var left = _bp_left
+    var right = _bp_right
    var t0 = 0
    var t1 = 0
    var done = gen_label("eq_done")
@@ -472,7 +491,11 @@ var mcode = function(ast) {
  }

  // emit_ne_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(true)
-  var emit_ne_decomposed = function(dest, left, right, left_node, right_node) {
+  // reads _bp_dest, _bp_left, _bp_right from closure
+  var emit_ne_decomposed = function() {
+    var dest = _bp_dest
+    var left = _bp_left
+    var right = _bp_right
    var t0 = 0
    var t1 = 0
    var done = gen_label("ne_done")
@@ -549,15 +572,19 @@ var mcode = function(ast) {
  }

  // emit_relational: int -> float -> text -> disrupt
-  var emit_relational = function(int_op, float_op, text_op, dest, left, right, left_node, right_node) {
+  // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
+  var emit_relational = function(int_op, float_op, text_op) {
+    var dest = _bp_dest
+    var left = _bp_left
+    var right = _bp_right
    var t0 = 0
    var t1 = 0
-    var left_is_int = is_known_int(left_node)
-    var left_is_num = is_known_number(left_node)
-    var left_is_text = is_known_text(left_node)
-    var right_is_int = is_known_int(right_node)
-    var right_is_num = is_known_number(right_node)
-    var right_is_text = is_known_text(right_node)
+    var left_is_int = is_known_int(_bp_ln)
+    var left_is_num = is_known_number(_bp_ln)
+    var left_is_text = is_known_text(_bp_ln)
+    var right_is_int = is_known_int(_bp_rn)
+    var right_is_num = is_known_number(_bp_rn)
+    var right_is_text = is_known_text(_bp_rn)
    var not_int = null
    var not_num = null
    var done = null
@@ -654,29 +681,33 @@ var mcode = function(ast) {
  }

  // Central router: maps op string to decomposition helper
-  var emit_binop = function(op_str, dest, left, right, left_node, right_node) {
+  // Sets _bp_* closure vars then calls helper with reduced args
+  var emit_binop = function(op_str, dest, left, right) {
+    _bp_dest = dest
+    _bp_left = left
+    _bp_right = right
    if (op_str == "add") {
-      emit_add_decomposed(dest, left, right, left_node, right_node)
+      emit_add_decomposed()
    } else if (op_str == "subtract") {
-      emit_numeric_binop("sub_int", "sub_float", dest, left, right, left_node, right_node)
+      emit_numeric_binop("sub_int", "sub_float")
    } else if (op_str == "multiply") {
-      emit_numeric_binop("mul_int", "mul_float", dest, left, right, left_node, right_node)
+      emit_numeric_binop("mul_int", "mul_float")
    } else if (op_str == "divide") {
-      emit_numeric_binop("div_int", "div_float", dest, left, right, left_node, right_node)
+      emit_numeric_binop("div_int", "div_float")
    } else if (op_str == "modulo") {
-      emit_numeric_binop("mod_int", "mod_float", dest, left, right, left_node, right_node)
+      emit_numeric_binop("mod_int", "mod_float")
    } else if (op_str == "eq") {
-      emit_eq_decomposed(dest, left, right, left_node, right_node)
+      emit_eq_decomposed()
    } else if (op_str == "ne") {
-      emit_ne_decomposed(dest, left, right, left_node, right_node)
+      emit_ne_decomposed()
    } else if (op_str == "lt") {
-      emit_relational("lt_int", "lt_float", "lt_text", dest, left, right, left_node, right_node)
+      emit_relational("lt_int", "lt_float", "lt_text")
    } else if (op_str == "le") {
-      emit_relational("le_int", "le_float", "le_text", dest, left, right, left_node, right_node)
+      emit_relational("le_int", "le_float", "le_text")
    } else if (op_str == "gt") {
-      emit_relational("gt_int", "gt_float", "gt_text", dest, left, right, left_node, right_node)
+      emit_relational("gt_int", "gt_float", "gt_text")
    } else if (op_str == "ge") {
-      emit_relational("ge_int", "ge_float", "ge_text", dest, left, right, left_node, right_node)
+      emit_relational("ge_int", "ge_float", "ge_text")
    } else {
      // Passthrough for bitwise, pow, in, etc.
      emit_3(op_str, dest, left, right)
@@ -685,19 +716,31 @@ var mcode = function(ast) {
  }

  var emit_get_prop = function(dest, obj, prop) {
-    add_instr(["load", dest, obj, prop])
+    add_instr(["load_field", dest, obj, prop])
  }

  var emit_set_prop = function(obj, prop, val) {
-    add_instr(["store", obj, val, prop])
+    add_instr(["store_field", obj, val, prop])
  }

-  var emit_get_elem = function(dest, obj, idx) {
-    emit_3("load", dest, obj, idx)
+  var emit_get_elem = function(dest, obj, idx, access_kind) {
+    if (access_kind == "index") {
+      emit_3("load_index", dest, obj, idx)
+    } else if (access_kind == "field") {
+      emit_3("load_field", dest, obj, idx)
+    } else {
+      emit_3("load_dynamic", dest, obj, idx)
+    }
  }

-  var emit_set_elem = function(obj, idx, val) {
-    emit_3("store", obj, val, idx)
+  var emit_set_elem = function(obj, idx, val, access_kind) {
+    if (access_kind == "index") {
+      emit_3("store_index", obj, val, idx)
+    } else if (access_kind == "field") {
+      emit_3("store_field", obj, val, idx)
+    } else {
+      emit_3("store_dynamic", obj, val, idx)
+    }
  }

  var emit_call = function(dest, func_slot, args) {
@@ -718,23 +761,37 @@ var mcode = function(ast) {
  }

  var emit_call_method = function(dest, obj, prop, args) {
-    var instr = ["callmethod", dest, obj, prop]
+    var method_slot = alloc_slot()
+    add_instr(["load_field", method_slot, obj, prop])
+    var argc = length(args)
+    var frame_slot = alloc_slot()
+    emit_3("frame", frame_slot, method_slot, argc)
+    emit_3("setarg", frame_slot, 0, obj)
+    var arg_idx = 1
    var _i = 0
-    while (_i < length(args)) {
-      push(instr, args[_i])
+    while (_i < argc) {
+      emit_3("setarg", frame_slot, arg_idx, args[_i])
+      arg_idx = arg_idx + 1
      _i = _i + 1
    }
-    add_instr(instr)
+    emit_2("invoke", frame_slot, dest)
  }

  var emit_call_method_dyn = function(dest, obj, key_reg, args) {
-    var instr = ["callmethod_dyn", dest, obj, key_reg]
+    var method_slot = alloc_slot()
+    emit_3("load_dynamic", method_slot, obj, key_reg)
+    var argc = length(args)
+    var frame_slot = alloc_slot()
+    emit_3("frame", frame_slot, method_slot, argc)
+    emit_3("setarg", frame_slot, 0, obj)
+    var arg_idx = 1
    var _i = 0
-    while (_i < length(args)) {
-      push(instr, args[_i])
+    while (_i < argc) {
+      emit_3("setarg", frame_slot, arg_idx, args[_i])
+      arg_idx = arg_idx + 1
      _i = _i + 1
    }
-    add_instr(instr)
+    emit_2("invoke", frame_slot, dest)
  }

  var emit_go_call = function(func_slot, args) {
@@ -920,7 +977,9 @@ var mcode = function(ast) {
    if (op == null) {
      op = "add"
    }
-    emit_binop(op, dest, left_slot, right_slot, left, right)
+    _bp_ln = left
+    _bp_rn = right
+    emit_binop(op, dest, left_slot, right_slot)
    return dest
  }

@@ -972,7 +1031,9 @@ var mcode = function(ast) {
      }
      right_slot = gen_expr(right, -1)
      dest = alloc_slot()
-      emit_binop(op, dest, left_slot, right_slot, null, right)
+      _bp_ln = null
+      _bp_rn = right
+      emit_binop(op, dest, left_slot, right_slot)
      if (level == 0) {
        local = find_var(name)
        if (local >= 0) {
@@ -995,7 +1056,9 @@ var mcode = function(ast) {
      emit_get_prop(old_val, obj_slot, prop)
      right_slot = gen_expr(right, -1)
      dest = alloc_slot()
-      emit_binop(op, dest, old_val, right_slot, null, right)
+      _bp_ln = null
+      _bp_rn = right
+      emit_binop(op, dest, old_val, right_slot)
      emit_set_prop(obj_slot, prop, dest)
      return dest
    } else if (left_kind == "[") {
@@ -1004,11 +1067,13 @@ var mcode = function(ast) {
      obj_slot = gen_expr(obj, -1)
      idx_slot = gen_expr(idx_expr, -1)
      old_val = alloc_slot()
-      emit_get_elem(old_val, obj_slot, idx_slot)
+      emit_get_elem(old_val, obj_slot, idx_slot, left.access_kind)
      right_slot = gen_expr(right, -1)
      dest = alloc_slot()
-      emit_binop(op, dest, old_val, right_slot, null, right)
-      emit_set_elem(obj_slot, idx_slot, dest)
+      _bp_ln = null
+      _bp_rn = right
+      emit_binop(op, dest, old_val, right_slot)
+      emit_set_elem(obj_slot, idx_slot, dest, left.access_kind)
      return dest
    }
    return -1
@@ -1081,7 +1146,7 @@ var mcode = function(ast) {
      idx_expr = left.right
      obj_slot = gen_expr(obj, -1)
      idx_slot = gen_expr(idx_expr, -1)
-      emit_set_elem(obj_slot, idx_slot, val_slot)
+      emit_set_elem(obj_slot, idx_slot, val_slot, left.access_kind)
    }
    return val_slot
  }
@@ -1301,7 +1366,7 @@ var mcode = function(ast) {
      obj_slot = gen_expr(obj, -1)
      idx_slot = gen_expr(idx, -1)
      slot = alloc_slot()
-      emit_get_elem(slot, obj_slot, idx_slot)
+      emit_get_elem(slot, obj_slot, idx_slot, expr.access_kind)
      return slot
    }

@@ -1357,7 +1422,9 @@ var mcode = function(ast) {
        a0 = gen_expr(args_list[0], -1)
        a1 = gen_expr(args_list[1], -1)
        d = alloc_slot()
-        emit_binop(mop, d, a0, a1, args_list[0], args_list[1])
+        _bp_ln = args_list[0]
+        _bp_rn = args_list[1]
+        emit_binop(mop, d, a0, a1)
        return d
      }

@@ -1442,7 +1509,9 @@ var mcode = function(ast) {
          emit_access_intrinsic(old_slot, name)
        }
        new_slot = alloc_slot()
-        emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node)
+        _bp_ln = null
+        _bp_rn = one_node
+        emit_binop(arith_op, new_slot, old_slot, one_slot)
        if (level == 0) {
          local = find_var(name)
          if (local >= 0) {
@@ -1462,7 +1531,9 @@ var mcode = function(ast) {
        old_slot = alloc_slot()
        emit_get_prop(old_slot, obj_slot, prop)
        new_slot = alloc_slot()
-        emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node)
+        _bp_ln = null
+        _bp_rn = one_node
+        emit_binop(arith_op, new_slot, old_slot, one_slot)
        emit_set_prop(obj_slot, prop, new_slot)
        return postfix ? old_slot : new_slot
      } else if (operand_kind == "[") {
@@ -1471,10 +1542,12 @@ var mcode = function(ast) {
        obj_slot = gen_expr(obj, -1)
        idx_slot = gen_expr(idx_expr, -1)
        old_slot = alloc_slot()
-        emit_get_elem(old_slot, obj_slot, idx_slot)
+        emit_get_elem(old_slot, obj_slot, idx_slot, operand.access_kind)
        new_slot = alloc_slot()
-        emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node)
-        emit_set_elem(obj_slot, idx_slot, new_slot)
+        _bp_ln = null
+        _bp_rn = one_node
+        emit_binop(arith_op, new_slot, old_slot, one_slot)
+        emit_set_elem(obj_slot, idx_slot, new_slot, operand.access_kind)
        return postfix ? old_slot : new_slot
      }
    }
@@ -1911,7 +1984,9 @@ var mcode = function(ast) {
          case_expr = case_node.expression
          case_val = gen_expr(case_expr, -1)
          cmp_slot = alloc_slot()
-          emit_binop("eq", cmp_slot, switch_val, case_val, null, case_expr)
+          _bp_ln = null
+          _bp_rn = case_expr
+          emit_binop("eq", cmp_slot, switch_val, case_val)
          emit_jump_cond("jump_true", cmp_slot, case_label)
          push(case_labels, case_label)
        }
--- a/mcode.mach
+++ b/mcode.mach
--- a/parse.ce
+++ b/parse.ce
@@ -1,8 +1,9 @@
 var fd = use("fd")
+var json = use("json")
 var tokenize = use("tokenize")
 var parse = use("parse")
 var filename = args[0]
 var src = text(fd.slurp(filename))
 var result = tokenize(src, filename)
-var ast = parse(result.tokens, src, filename)
+var ast = parse(result.tokens, src, filename, tokenize)
 print(json.encode(ast))
--- a/parse.cm
+++ b/parse.cm
@@ -1493,6 +1493,22 @@ var parse = function(tokens, src, filename, tokenizer) {
    return functino_names[name] == true
  }

+  var derive_type_tag = function(expr) {
+    if (expr == null) return null
+    var k = expr.kind
+    if (k == "array") return "array"
+    if (k == "record") return "record"
+    if (k == "function") return "function"
+    if (k == "text" || k == "text literal") return "text"
+    if (k == "number") {
+      if (is_integer(expr.number)) return "integer"
+      return "number"
+    }
+    if (k == "true" || k == "false") return "logical"
+    if (k == "null") return "null"
+    return null
+  }
+
  var _assign_kinds = {
    assign: true, "+=": true, "-=": true, "*=": true, "/=": true, "%=": true,
    "<<=": true, ">>=": true, ">>>=": true,
@@ -1517,7 +1533,8 @@ var parse = function(tokens, src, filename, tokenizer) {
        function_nr: v.function_nr,
        nr_uses: v.nr_uses,
        closure: v.closure == 1,
-        level: 0
+        level: 0,
+        type_tag: v.type_tag
      }
      slots = slots + 1
      if (v.closure) close_slots = close_slots + 1
@@ -1650,13 +1667,26 @@ var parse = function(tokens, src, filename, tokenizer) {
      return null
    }

+    if (kind == "[") {
+      sem_check_expr(scope, expr.left)
+      sem_check_expr(scope, expr.right)
+      if (expr.right != null) {
+        if (expr.right.kind == "number" && is_integer(expr.right.number)) {
+          expr.access_kind = "index"
+        } else if (expr.right.kind == "text") {
+          expr.access_kind = "field"
+        }
+      }
+      return null
+    }
+
    if (kind == "," || kind == "+" || kind == "-" || kind == "*" ||
        kind == "/" || kind == "%" || kind == "==" || kind == "!=" ||
        kind == "<" || kind == ">" || kind == "<=" || kind == ">=" ||
        kind == "&&" || kind == "||" || kind == "&" ||
        kind == "|" || kind == "^" || kind == "<<" || kind == ">>" ||
        kind == ">>>" || kind == "**" || kind == "in" ||
-        kind == "." || kind == "[") {
+        kind == ".") {
      sem_check_expr(scope, expr.left)
      sem_check_expr(scope, expr.right)
      return null
@@ -1765,6 +1795,7 @@ var parse = function(tokens, src, filename, tokenizer) {
          if (r.level > 0) r.v.closure = 1
        } else {
          expr.level = -1
+          expr.intrinsic = true
          sem_add_intrinsic(name)
        }
      }
@@ -1788,6 +1819,7 @@ var parse = function(tokens, src, filename, tokenizer) {
    var pname = null
    var def_val = null
    var sr = null
+    var tt = null

    if (kind == "var_list") {
      i = 0
@@ -1827,6 +1859,13 @@ var parse = function(tokens, src, filename, tokenizer) {
        }
      }
      sem_check_expr(scope, stmt.right)
+      if (name != null) {
+        tt = derive_type_tag(stmt.right)
+        if (tt != null) {
+          existing = sem_find_var(scope, name)
+          if (existing != null) existing.type_tag = tt
+        }
+      }
      return null
    }

@@ -1904,6 +1943,9 @@ var parse = function(tokens, src, filename, tokenizer) {

    if (kind == "return" || kind == "go") {
      sem_check_expr(scope, stmt.expression)
+      if (stmt.expression != null && stmt.expression.kind == "(") {
+        stmt.tail = true
+      }
      return null
    }

--- a/qbe.ce
+++ b/qbe.ce
@@ -0,0 +1,18 @@
+var fd = use("fd")
+var json = use("json")
+var tokenize = use("tokenize")
+var parse = use("parse")
+var fold = use("fold")
+var mcode = use("mcode")
+var streamline = use("streamline")
+var qbe_macros = use("qbe")
+var qbe_emit = use("qbe_emit")
+var filename = args[0]
+var src = text(fd.slurp(filename))
+var result = tokenize(src, filename)
+var ast = parse(result.tokens, src, filename, tokenize)
+var folded = fold(ast)
+var compiled = mcode(folded)
+var optimized = streamline(compiled)
+var il = qbe_emit(optimized, qbe_macros)
+print(il)
--- a/qbe.cm
+++ b/qbe.cm
@@ -13,6 +13,11 @@ def js_true = 35
 def js_exception = 15
 def js_empty_text = 27

+// Shared closure vars for functions with >4 params
+var _qop = null
+var _qop2 = null
+var _qflags = null
+
 def int32_min = -2147483648
 def int32_max = 2147483647
 def mantissa_mask = 4503599627370495
@@ -398,18 +403,20 @@ var mod = function(p, ctx, a, b) {
 // ============================================================

 // Helper: generate comparison for a given op string and int comparison QBE op
-// null_true: whether null==null returns true (eq, le, ge) or false (ne, lt, gt)
-var cmp = function(p, ctx, a, b, int_cmp_op, float_cmp_op_id, is_eq, is_ne, null_true) {
+// reads _qflags = {int_cmp_op, float_id, is_eq, is_ne, null_true} from closure
+var cmp = function(p, ctx, a, b) {
+  var int_cmp_op = _qflags.int_cmp_op
+  var float_cmp_op_id = _qflags.float_id
  var eq_only = 0
-  if (is_eq || is_ne) {
+  var mismatch_val = js_false
+  var null_val = js_false
+  if (_qflags.is_eq || _qflags.is_ne) {
    eq_only = 1
  }
-  var mismatch_val = js_false
-  if (is_ne) {
+  if (_qflags.is_ne) {
    mismatch_val = js_true
  }
-  var null_val = js_false
-  if (null_true) {
+  if (_qflags.null_true) {
    null_val = js_true
  }
  return `@${p}.start
@@ -485,27 +492,32 @@ var cmp = function(p, ctx, a, b, int_cmp_op, float_cmp_op_id, is_eq, is_ne, null
 // MACH_EQ=0, NEQ=1, LT=2, LE=3, GT=4, GE=5
 // null_true: eq, le, ge return true for null==null; ne, lt, gt return false
 var eq = function(p, ctx, a, b) {
-  return cmp(p, ctx, a, b, "ceqw", 0, true, false, true)
+  _qflags = {int_cmp_op: "ceqw", float_id: 0, is_eq: true, is_ne: false, null_true: true}
+  return cmp(p, ctx, a, b)
 }

 var ne = function(p, ctx, a, b) {
-  return cmp(p, ctx, a, b, "cnew", 1, false, true, false)
+  _qflags = {int_cmp_op: "cnew", float_id: 1, is_eq: false, is_ne: true, null_true: false}
+  return cmp(p, ctx, a, b)
 }

 var lt = function(p, ctx, a, b) {
-  return cmp(p, ctx, a, b, "csltw", 2, false, false, false)
+  _qflags = {int_cmp_op: "csltw", float_id: 2, is_eq: false, is_ne: false, null_true: false}
+  return cmp(p, ctx, a, b)
 }

 var le = function(p, ctx, a, b) {
-  return cmp(p, ctx, a, b, "cslew", 3, false, false, true)
+  _qflags = {int_cmp_op: "cslew", float_id: 3, is_eq: false, is_ne: false, null_true: true}
+  return cmp(p, ctx, a, b)
 }

 var gt = function(p, ctx, a, b) {
-  return cmp(p, ctx, a, b, "csgtw", 4, false, false, false)
+  _qflags = {int_cmp_op: "csgtw", float_id: 4, is_eq: false, is_ne: false, null_true: false}
+  return cmp(p, ctx, a, b)
 }

 var ge = function(p, ctx, a, b) {
-  return cmp(p, ctx, a, b, "csgew", 5, false, false, true)
+  _qflags = {int_cmp_op: "csgew", float_id: 5, is_eq: false, is_ne: false, null_true: true}
 }

 // ============================================================
@@ -627,7 +639,9 @@ var bnot = function(p, ctx, v) {
 // Both operands must be numeric. Int fast path, float -> convert to int32.
 // ============================================================

-var bitwise_op = function(p, ctx, a, b, qbe_op) {
+// reads _qop from closure
+var bitwise_op = function(p, ctx, a, b) {
+  var qbe_op = _qop
  return `@${p}.start
  %${p}.at =l and ${a}, 1
  %${p}.bt =l and ${b}, 1
@@ -654,19 +668,24 @@ var bitwise_op = function(p, ctx, a, b, qbe_op) {
 }

 var band = function(p, ctx, a, b) {
-  return bitwise_op(p, ctx, a, b, "and")
+  _qop = "and"
+  return bitwise_op(p, ctx, a, b)
 }

 var bor = function(p, ctx, a, b) {
-  return bitwise_op(p, ctx, a, b, "or")
+  _qop = "or"
+  return bitwise_op(p, ctx, a, b)
 }

 var bxor = function(p, ctx, a, b) {
-  return bitwise_op(p, ctx, a, b, "xor")
+  _qop = "xor"
+  return bitwise_op(p, ctx, a, b)
 }

 // Shift ops: mask shift amount to 5 bits (& 31)
-var shift_op = function(p, ctx, a, b, qbe_op) {
+// reads _qop from closure
+var shift_op = function(p, ctx, a, b) {
+  var qbe_op = _qop
  return `@${p}.start
  %${p}.at =l and ${a}, 1
  %${p}.bt =l and ${b}, 1
@@ -694,15 +713,18 @@ var shift_op = function(p, ctx, a, b, qbe_op) {
 }

 var shl = function(p, ctx, a, b) {
-  return shift_op(p, ctx, a, b, "shl")
+  _qop = "shl"
+  return shift_op(p, ctx, a, b)
 }

 var shr = function(p, ctx, a, b) {
-  return shift_op(p, ctx, a, b, "sar")
+  _qop = "sar"
+  return shift_op(p, ctx, a, b)
 }

 var ushr = function(p, ctx, a, b) {
-  return shift_op(p, ctx, a, b, "shr")
+  _qop = "shr"
+  return shift_op(p, ctx, a, b)
 }

 // ============================================================
@@ -898,7 +920,9 @@ var gt_int = function(p, ctx, a, b) { return cmp_int(p, a, b, "csgtw") }
 var ge_int = function(p, ctx, a, b) { return cmp_int(p, a, b, "csgew") }

 // --- Comparisons (float path) ---
-var cmp_float = function(p, ctx, a, b, op_id) {
+// reads _qop from closure (op_id)
+var cmp_float = function(p, ctx, a, b) {
+  var op_id = _qop
  return `  %${p}.fcr =w call $qbe_float_cmp(l ${ctx}, w ${op_id}, l ${a}, l ${b})
  %${p}.fcrext =l extuw %${p}.fcr
  %${p}.fsh =l shl %${p}.fcrext, 5
@@ -906,15 +930,18 @@ var cmp_float = function(p, ctx, a, b, op_id) {
 `
 }

-var eq_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 0) }
-var ne_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 1) }
-var lt_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 2) }
-var le_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 3) }
-var gt_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 4) }
-var ge_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 5) }
+var eq_float = function(p, ctx, a, b) { _qop = 0; return cmp_float(p, ctx, a, b) }
+var ne_float = function(p, ctx, a, b) { _qop = 1; return cmp_float(p, ctx, a, b) }
+var lt_float = function(p, ctx, a, b) { _qop = 2; return cmp_float(p, ctx, a, b) }
+var le_float = function(p, ctx, a, b) { _qop = 3; return cmp_float(p, ctx, a, b) }
+var gt_float = function(p, ctx, a, b) { _qop = 4; return cmp_float(p, ctx, a, b) }
+var ge_float = function(p, ctx, a, b) { _qop = 5; return cmp_float(p, ctx, a, b) }

 // --- Comparisons (text path) ---
-var cmp_text = function(p, ctx, a, b, qbe_op, eq_only) {
+// reads _qop (qbe_op) and _qop2 (eq_only) from closure
+var cmp_text = function(p, ctx, a, b) {
+  var qbe_op = _qop
+  var eq_only = _qop2
  return `  %${p}.scmp =w call $js_string_compare_value(l ${ctx}, l ${a}, l ${b}, w ${eq_only})
  %${p}.tcr =w ${qbe_op} %${p}.scmp, 0
  %${p}.tcrext =l extuw %${p}.tcr
@@ -923,12 +950,12 @@ var cmp_text = function(p, ctx, a, b, qbe_op, eq_only) {
 `
 }

-var eq_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "ceqw", 1) }
-var ne_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "cnew", 1) }
-var lt_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csltw", 0) }
-var le_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "cslew", 0) }
-var gt_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csgtw", 0) }
-var ge_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csgew", 0) }
+var eq_text = function(p, ctx, a, b) { _qop = "ceqw"; _qop2 = 1; return cmp_text(p, ctx, a, b) }
+var ne_text = function(p, ctx, a, b) { _qop = "cnew"; _qop2 = 1; return cmp_text(p, ctx, a, b) }
+var lt_text = function(p, ctx, a, b) { _qop = "csltw"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
+var le_text = function(p, ctx, a, b) { _qop = "cslew"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
+var gt_text = function(p, ctx, a, b) { _qop = "csgtw"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
+var ge_text = function(p, ctx, a, b) { _qop = "csgew"; _qop2 = 0; return cmp_text(p, ctx, a, b) }

 // --- Comparisons (bool path) ---
 var eq_bool = function(p, a, b) {
--- a/qbe.mach
+++ b/qbe.mach
--- a/qbe_emit.cm
+++ b/qbe_emit.cm
@@ -0,0 +1,667 @@
+// qbe_emit.cm — mcode IR → QBE IL compiler
+// Takes mcode IR (from mcode.cm) and uses qbe.cm macros to produce
+// a complete QBE IL program ready for the qbe compiler.
+// qbe module is passed via env as 'qbe'
+
+var qbe_emit = function(ir, qbe) {
+  var out = []
+  var data_out = []
+  var str_table = {}
+  var str_id = 0
+  var uid = 0
+
+  // ============================================================
+  // Output helpers
+  // ============================================================
+
+  var emit = function(s) {
+    push(out, s)
+  }
+
+  var fresh = function() {
+    uid = uid + 1
+    return "u" + text(uid)
+  }
+
+  var s = function(n) {
+    return "%s" + text(n)
+  }
+
+  var sanitize = function(lbl) {
+    var r = replace(lbl, ".", "_")
+    r = replace(r, "-", "_")
+    r = replace(r, " ", "_")
+    r = replace(r, "/", "_")
+    r = replace(r, "<", "")
+    r = replace(r, ">", "")
+    r = replace(r, "(", "")
+    r = replace(r, ")", "")
+    return r
+  }
+
+  // ============================================================
+  // String interning — emit data section entries
+  // ============================================================
+
+  var intern_str = function(val) {
+    if (str_table[val] != null) return str_table[val]
+    var label = "$d_str_" + text(str_id)
+    str_id = str_id + 1
+    var escaped = replace(val, "\\", "\\\\")
+    escaped = replace(escaped, "\"", "\\\"")
+    var line = "data " + label + ' = ' + '{ b "' + escaped + '", b 0 }'
+    push(data_out, line)
+    str_table[val] = label
+    return label
+  }
+
+  // ============================================================
+  // Extract property name from mcode operand
+  // ============================================================
+
+  var prop_name = function(a) {
+    if (is_text(a)) return a
+    if (is_object(a)) {
+      if (a.name != null) return a.name
+      if (a.value != null) return a.value
+    }
+    return null
+  }
+
+  // ============================================================
+  // Compile one function's instructions
+  // ============================================================
+
+  var compile_fn = function(fn, fn_idx, is_main) {
+    var instrs = fn.instructions
+    var nr_slots = fn.nr_slots
+    var nr_args = fn.nr_args
+    var name = is_main ? "cell_main" : "cell_fn_" + text(fn_idx)
+    name = sanitize(name)
+    var i = 0
+    var instr = null
+    var op = null
+    var a1 = null
+    var a2 = null
+    var a3 = null
+    var a4 = null
+    var p = null
+    var pn = null
+    var sl = null
+    var fop_id = 0
+
+    // Function signature: (ctx, frame_ptr) → JSValue
+    emit(`export function l $${name}(l %ctx, l %fp) {`)
+    emit("@entry")
+
+    // Load all slots from frame into SSA variables
+    // Each slot is a JSValue (8 bytes) at fp + slot*8
+    var off = 0
+    i = 0
+    while (i < nr_slots) {
+      off = i * 8
+      emit(`  %p${text(i)} =l add %fp, ${text(off)}`)
+      emit(`  ${s(i)} =l loadl %p${text(i)}`)
+      i = i + 1
+    }
+
+    // Walk instructions
+    i = 0
+    while (i < length(instrs)) {
+      instr = instrs[i]
+      i = i + 1
+
+      // Labels are plain strings
+      if (is_text(instr)) {
+        emit("@" + sanitize(instr))
+        continue
+      }
+
+      op = instr[0]
+      a1 = instr[1]
+      a2 = instr[2]
+      a3 = instr[3]
+
+      // --- Constants ---
+
+      if (op == "int") {
+        emit(`  ${s(a1)} =l copy ${text(a2 * 2)}`)
+        continue
+      }
+      if (op == "null") {
+        emit(`  ${s(a1)} =l copy ${text(qbe.js_null)}`)
+        continue
+      }
+      if (op == "true") {
+        emit(`  ${s(a1)} =l copy ${text(qbe.js_true)}`)
+        continue
+      }
+      if (op == "false") {
+        emit(`  ${s(a1)} =l copy ${text(qbe.js_false)}`)
+        continue
+      }
+      if (op == "access") {
+        if (is_number(a2)) {
+          if (is_integer(a2)) {
+            emit(`  ${s(a1)} =l copy ${text(a2 * 2)}`)
+          } else {
+            emit(`  ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2)})`)
+          }
+        } else if (is_text(a2)) {
+          sl = intern_str(a2)
+          emit(`  ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`)
+        } else if (is_object(a2)) {
+          if (a2.make == "intrinsic") {
+            sl = intern_str(a2.name)
+            emit(`  ${s(a1)} =l call $cell_rt_get_intrinsic(l %ctx, l ${sl})`)
+          } else if (a2.kind == "number") {
+            if (a2.number != null && is_integer(a2.number)) {
+              emit(`  ${s(a1)} =l copy ${text(a2.number * 2)}`)
+            } else if (a2.number != null) {
+              emit(`  ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2.number)})`)
+            } else {
+              emit(`  ${s(a1)} =l copy ${text(qbe.js_null)}`)
+            }
+          } else if (a2.kind == "text") {
+            sl = intern_str(a2.value)
+            emit(`  ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`)
+          } else if (a2.kind == "true") {
+            emit(`  ${s(a1)} =l copy ${text(qbe.js_true)}`)
+          } else if (a2.kind == "false") {
+            emit(`  ${s(a1)} =l copy ${text(qbe.js_false)}`)
+          } else if (a2.kind == "null") {
+            emit(`  ${s(a1)} =l copy ${text(qbe.js_null)}`)
+          } else {
+            emit(`  ${s(a1)} =l copy ${text(qbe.js_null)}`)
+          }
+        } else {
+          emit(`  ${s(a1)} =l copy ${text(qbe.js_null)}`)
+        }
+        continue
+      }
+
+      // --- Movement ---
+
+      if (op == "move") {
+        emit(`  ${s(a1)} =l copy ${s(a2)}`)
+        continue
+      }
+
+      // --- Arithmetic (int path) — use qbe.cm macros ---
+
+      if (op == "add_int") {
+        p = fresh()
+        emit(qbe.add_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "sub_int") {
+        p = fresh()
+        emit(qbe.sub_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "mul_int") {
+        p = fresh()
+        emit(qbe.mul_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "div_int") {
+        p = fresh()
+        emit(qbe.div_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "mod_int") {
+        p = fresh()
+        emit(qbe.mod_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+
+      // --- Arithmetic (float path) ---
+
+      if (op == "add_float") {
+        p = fresh()
+        emit(qbe.add_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "sub_float") {
+        p = fresh()
+        emit(qbe.sub_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "mul_float") {
+        p = fresh()
+        emit(qbe.mul_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "div_float") {
+        p = fresh()
+        emit(qbe.div_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "mod_float") {
+        p = fresh()
+        emit(qbe.mod_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+
+      // --- String concat ---
+
+      if (op == "concat") {
+        p = fresh()
+        emit(qbe.concat(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+
+      // --- Type checks — use qbe.cm macros ---
+
+      if (op == "is_int") {
+        p = fresh()
+        emit(qbe.is_int(p, s(a2)))
+        emit(qbe.new_bool(p + ".r", "%" + p))
+        emit(`  ${s(a1)} =l copy %${p}.r`)
+        continue
+      }
+      if (op == "is_text") {
+        p = fresh()
+        emit(qbe.is_imm_text(p, s(a2)))
+        emit(qbe.new_bool(p + ".r", "%" + p))
+        emit(`  ${s(a1)} =l copy %${p}.r`)
+        continue
+      }
+      if (op == "is_num") {
+        p = fresh()
+        emit(qbe.is_number(p, s(a2)))
+        emit(qbe.new_bool(p + ".r", "%" + p))
+        emit(`  ${s(a1)} =l copy %${p}.r`)
+        continue
+      }
+      if (op == "is_bool") {
+        p = fresh()
+        emit(qbe.is_bool(p, s(a2)))
+        emit(qbe.new_bool(p + ".r", "%" + p))
+        emit(`  ${s(a1)} =l copy %${p}.r`)
+        continue
+      }
+      if (op == "is_null") {
+        p = fresh()
+        emit(qbe.is_null(p, s(a2)))
+        emit(qbe.new_bool(p + ".r", "%" + p))
+        emit(`  ${s(a1)} =l copy %${p}.r`)
+        continue
+      }
+      if (op == "is_identical") {
+        p = fresh()
+        emit(qbe.is_identical(p, s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+
+      // --- Comparisons (int path) ---
+
+      if (op == "eq_int") {
+        p = fresh()
+        emit(qbe.eq_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "ne_int") {
+        p = fresh()
+        emit(qbe.ne_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "lt_int") {
+        p = fresh()
+        emit(qbe.lt_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "gt_int") {
+        p = fresh()
+        emit(qbe.gt_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "le_int") {
+        p = fresh()
+        emit(qbe.le_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "ge_int") {
+        p = fresh()
+        emit(qbe.ge_int(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+
+      // --- Comparisons (float/text/bool) ---
+
+      if (op == "eq_float") {
+        p = fresh()
+        emit(qbe.eq_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "ne_float") {
+        p = fresh()
+        emit(qbe.ne_float(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "lt_float" || op == "gt_float" || op == "le_float" || op == "ge_float") {
+        p = fresh()
+        fop_id = 0
+        if (op == "lt_float") fop_id = 2
+        else if (op == "le_float") fop_id = 3
+        else if (op == "gt_float") fop_id = 4
+        else if (op == "ge_float") fop_id = 5
+        emit(qbe.cmp_float != null ? cmp_float(p, "%ctx", s(a2), s(a3), fop_id) : `  %${p} =l call $qbe_float_cmp(l %ctx, w ${text(fop_id)}, l ${s(a2)}, l ${s(a3)})`)
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "eq_text") {
+        p = fresh()
+        emit(qbe.eq_text(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "ne_text") {
+        p = fresh()
+        emit(qbe.ne_text(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "lt_text" || op == "gt_text" || op == "le_text" || op == "ge_text") {
+        p = fresh()
+        emit(`  ${s(a1)} =l call $cell_rt_${op}(l %ctx, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+      if (op == "eq_bool") {
+        p = fresh()
+        emit(qbe.eq_bool(p, s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "ne_bool") {
+        p = fresh()
+        emit(qbe.ne_bool(p, s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "eq_tol" || op == "ne_tol") {
+        emit(`  ${s(a1)} =l call $cell_rt_${op}(l %ctx, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+
+      // --- Boolean ops ---
+
+      if (op == "not") {
+        p = fresh()
+        emit(qbe.lnot(p, "%ctx", s(a2)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "and") {
+        emit(`  ${s(a1)} =l and ${s(a2)}, ${s(a3)}`)
+        continue
+      }
+      if (op == "or") {
+        emit(`  ${s(a1)} =l or ${s(a2)}, ${s(a3)}`)
+        continue
+      }
+
+      // --- Bitwise ops — use qbe.cm macros ---
+
+      if (op == "bitnot") {
+        p = fresh()
+        emit(qbe.bnot(p, "%ctx", s(a2)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "bitand") {
+        p = fresh()
+        emit(qbe.band(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "bitor") {
+        p = fresh()
+        emit(qbe.bor(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "bitxor") {
+        p = fresh()
+        emit(qbe.bxor(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "shl") {
+        p = fresh()
+        emit(qbe.shl(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "shr") {
+        p = fresh()
+        emit(qbe.shr(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+      if (op == "ushr") {
+        p = fresh()
+        emit(qbe.ushr(p, "%ctx", s(a2), s(a3)))
+        emit(`  ${s(a1)} =l copy %${p}`)
+        continue
+      }
+
+      // --- Property access — runtime calls ---
+
+      if (op == "load_field") {
+        pn = prop_name(a3)
+        if (pn != null) {
+          sl = intern_str(pn)
+          emit(`  ${s(a1)} =l call $cell_rt_load_field(l %ctx, l ${s(a2)}, l ${sl})`)
+        } else {
+          emit(`  ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`)
+        }
+        continue
+      }
+      if (op == "load_index") {
+        emit(`  ${s(a1)} =l call $cell_rt_load_index(l %ctx, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+      if (op == "load_dynamic") {
+        emit(`  ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+      if (op == "store_field") {
+        pn = prop_name(a3)
+        if (pn != null) {
+          sl = intern_str(pn)
+          emit(`  call $cell_rt_store_field(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${sl})`)
+        } else {
+          emit(`  call $cell_rt_store_dynamic(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`)
+        }
+        continue
+      }
+      if (op == "store_index") {
+        emit(`  call $cell_rt_store_index(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+      if (op == "store_dynamic") {
+        emit(`  call $cell_rt_store_dynamic(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+
+      // --- Closure access ---
+
+      if (op == "get") {
+        emit(`  ${s(a1)} =l call $cell_rt_get_closure(l %ctx, l %fp, l ${text(a2)}, l ${text(a3)})`)
+        continue
+      }
+      if (op == "put") {
+        emit(`  call $cell_rt_put_closure(l %ctx, l %fp, l ${s(a1)}, l ${text(a2)}, l ${text(a3)})`)
+        continue
+      }
+
+      // --- Control flow ---
+
+      if (op == "jump") {
+        emit(`  jmp @${sanitize(a1)}`)
+        continue
+      }
+      if (op == "jump_true") {
+        p = fresh()
+        emit(`  %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
+        emit(`  jnz %${p}, @${sanitize(a2)}, @${p}_f`)
+        emit(`@${p}_f`)
+        continue
+      }
+      if (op == "jump_false") {
+        p = fresh()
+        emit(`  %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
+        emit(`  jnz %${p}, @${p}_t, @${sanitize(a2)}`)
+        emit(`@${p}_t`)
+        continue
+      }
+      if (op == "jump_null") {
+        p = fresh()
+        emit(`  %${p} =w ceql ${s(a1)}, ${text(qbe.js_null)}`)
+        emit(`  jnz %${p}, @${sanitize(a2)}, @${p}_nn`)
+        emit(`@${p}_nn`)
+        continue
+      }
+      if (op == "jump_not_null") {
+        p = fresh()
+        emit(`  %${p} =w cnel ${s(a1)}, ${text(qbe.js_null)}`)
+        emit(`  jnz %${p}, @${sanitize(a2)}, @${p}_n`)
+        emit(`@${p}_n`)
+        continue
+      }
+      if (op == "wary_true") {
+        p = fresh()
+        emit(`  %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
+        emit(`  jnz %${p}, @${sanitize(a2)}, @${p}_f`)
+        emit(`@${p}_f`)
+        continue
+      }
+      if (op == "wary_false") {
+        p = fresh()
+        emit(`  %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
+        emit(`  jnz %${p}, @${p}_t, @${sanitize(a2)}`)
+        emit(`@${p}_t`)
+        continue
+      }
+
+      // --- Function calls ---
+
+      if (op == "frame") {
+        emit(`  ${s(a1)} =l call $cell_rt_frame(l %ctx, l ${s(a2)}, l ${text(a3)})`)
+        continue
+      }
+      if (op == "setarg") {
+        emit(`  call $cell_rt_setarg(l ${s(a1)}, l ${text(a2)}, l ${s(a3)})`)
+        continue
+      }
+      if (op == "invoke") {
+        emit(`  ${s(a2)} =l call $cell_rt_invoke(l %ctx, l ${s(a1)})`)
+        continue
+      }
+      if (op == "goframe") {
+        emit(`  ${s(a1)} =l call $cell_rt_goframe(l %ctx, l ${s(a2)}, l ${text(a3)})`)
+        continue
+      }
+      if (op == "goinvoke") {
+        emit(`  call $cell_rt_goinvoke(l %ctx, l ${s(a1)})`)
+        continue
+      }
+
+      // --- Function object creation ---
+
+      if (op == "function") {
+        emit(`  ${s(a1)} =l call $cell_rt_make_function(l %ctx, l ${text(a2)})`)
+        continue
+      }
+
+      // --- Array push/pop ---
+
+      if (op == "push") {
+        emit(`  call $cell_rt_push(l %ctx, l ${s(a1)}, l ${s(a2)})`)
+        continue
+      }
+      if (op == "pop") {
+        emit(`  ${s(a1)} =l call $cell_rt_pop(l %ctx, l ${s(a2)})`)
+        continue
+      }
+
+      // --- Misc ---
+
+      if (op == "return") {
+        emit(`  ret ${s(a1)}`)
+        continue
+      }
+      if (op == "disrupt") {
+        emit(`  call $cell_rt_disrupt(l %ctx)`)
+        emit(`  ret ${text(qbe.js_null)}`)
+        continue
+      }
+      if (op == "delete") {
+        emit(`  ${s(a1)} =l call $cell_rt_delete(l %ctx, l ${s(a2)}, l ${s(a3)})`)
+        continue
+      }
+      if (op == "typeof") {
+        emit(`  ${s(a1)} =l call $cell_rt_typeof(l %ctx, l ${s(a2)})`)
+        continue
+      }
+
+      // --- Unknown opcode ---
+      emit(`  # unknown: ${op}`)
+    }
+
+    emit("}")
+    emit("")
+  }
+
+  // ============================================================
+  // Main: compile all functions then main
+  // ============================================================
+
+  var fi = 0
+  while (fi < length(ir.functions)) {
+    compile_fn(ir.functions[fi], fi, false)
+    fi = fi + 1
+  }
+
+  compile_fn(ir.main, -1, true)
+
+  // Assemble: data section first, then function bodies
+  var result = []
+  var di = 0
+  while (di < length(data_out)) {
+    push(result, data_out[di])
+    di = di + 1
+  }
+  if (length(data_out) > 0) push(result, "")
+
+  di = 0
+  while (di < length(out)) {
+    push(result, out[di])
+    di = di + 1
+  }
+
+  return text(result, "\n")
+}
+
+return qbe_emit
--- a/qbe_emit.mach
+++ b/qbe_emit.mach
--- a/qbe_rt.c
+++ b/qbe_rt.c
@@ -0,0 +1,154 @@
+/*
+ * qbe_rt.c - Runtime support for QBE-compiled ƿit modules
+ *
+ * Provides non-inline versions of static-inline quickjs functions
+ * (which QBE-generated code calls as external symbols) and stub
+ * implementations of cell_rt_* helper functions.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+typedef uint64_t JSValue;
+typedef struct JSContext JSContext;
+
+#define JS_TAG_SHORT_FLOAT 5
+#define JS_TAG_NULL        7
+#define JS_VAL_NULL        7
+
+/* ============================================================
+   Non-inline wrappers for static-inline quickjs functions
+   ============================================================ */
+
+/*
+ * __JS_NewFloat64 — encode double as tagged JSValue
+ * Short float: [sign:1][exp:8][mantissa:52][tag:3]
+ * Returns tagged int if value is an exact integer in int32 range
+ */
+JSValue __JS_NewFloat64(JSContext *ctx, double d) {
+  union { double d; uint64_t u; } u;
+  u.d = d;
+
+  uint64_t sign = u.u >> 63;
+  int exp = (u.u >> 52) & 0x7FF;
+  uint64_t mantissa = u.u & ((1ULL << 52) - 1);
+
+  /* Zero */
+  if (exp == 0 && mantissa == 0)
+    return JS_TAG_SHORT_FLOAT;
+  /* NaN/Inf → null */
+  if (exp == 0x7FF)
+    return JS_VAL_NULL;
+  /* Subnormals → zero */
+  if (exp == 0)
+    return (sign << 63) | JS_TAG_SHORT_FLOAT;
+
+  int short_exp = exp - 1023 + 127;
+  if (short_exp < 1 || short_exp > 254)
+    return JS_VAL_NULL;
+
+  /* Prefer integer if exact */
+  if (d >= (double)(-2147483647 - 1) && d <= (double)2147483647) {
+    int32_t i = (int32_t)d;
+    if ((double)i == d)
+      return (uint64_t)(uint32_t)i << 1;
+  }
+
+  return (sign << 63)
+       | ((uint64_t)short_exp << 55)
+       | (mantissa << 3)
+       | JS_TAG_SHORT_FLOAT;
+}
+
+/*
+ * JS_IsNumber — check if value is tagged int or short float
+ */
+int JS_IsNumber(JSValue v) {
+  int is_int = (v & 1) == 0;
+  int is_float = (v & 7) == JS_TAG_SHORT_FLOAT;
+  return is_int || is_float;
+}
+
+/*
+ * JS_NewString — create string from C string (wraps JS_NewStringLen)
+ */
+extern JSValue JS_NewStringLen(JSContext *ctx, const char *str, size_t len);
+
+JSValue JS_NewString(JSContext *ctx, const char *str) {
+  return JS_NewStringLen(ctx, str, strlen(str));
+}
+
+/* ============================================================
+   cell_rt_* stubs — error/fallback paths for QBE-compiled code
+   These are called from type-mismatch branches that should not
+   be reached in pure numeric code.
+   ============================================================ */
+
+extern JSValue JS_ThrowTypeError(JSContext *ctx, const char *fmt, ...);
+
+void cell_rt_disrupt(JSContext *ctx) {
+  JS_ThrowTypeError(ctx, "type error in native code");
+}
+
+JSValue cell_rt_lt_text(JSContext *ctx, JSValue a, JSValue b) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_gt_text(JSContext *ctx, JSValue a, JSValue b) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_le_text(JSContext *ctx, JSValue a, JSValue b) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b) {
+  return JS_VAL_NULL;
+}
+
+JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_load_index(JSContext *ctx, JSValue arr, JSValue idx) {
+  return JS_VAL_NULL;
+}
+void cell_rt_store_field(JSContext *ctx, JSValue val, JSValue obj,
+                         const char *name) {}
+void cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj,
+                           JSValue key) {}
+void cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr,
+                         JSValue idx) {}
+JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth,
+                            int64_t index) {
+  return JS_VAL_NULL;
+}
+void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth,
+                         int64_t index) {}
+JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) {
+  return JS_VAL_NULL;
+}
+void cell_rt_setarg(JSValue frame, int64_t idx, JSValue val) {}
+JSValue cell_rt_invoke(JSContext *ctx, JSValue frame) { return JS_VAL_NULL; }
+JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int64_t nargs) {
+  return JS_VAL_NULL;
+}
+void cell_rt_goinvoke(JSContext *ctx, JSValue frame) {}
+JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx) {
+  return JS_VAL_NULL;
+}
+void cell_rt_push(JSContext *ctx, JSValue arr, JSValue val) {}
+JSValue cell_rt_pop(JSContext *ctx, JSValue arr) { return JS_VAL_NULL; }
+JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key) {
+  return JS_VAL_NULL;
+}
+JSValue cell_rt_typeof(JSContext *ctx, JSValue val) { return JS_VAL_NULL; }
--- a/regen.cm
+++ b/regen.cm
@@ -12,6 +12,9 @@ var files = [
  {src: "parse.cm", name: "parse", out: "parse.mach"},
  {src: "fold.cm", name: "fold", out: "fold.mach"},
  {src: "mcode.cm", name: "mcode", out: "mcode.mach"},
+  {src: "streamline.cm", name: "streamline", out: "streamline.mach"},
+  {src: "qbe.cm", name: "qbe", out: "qbe.mach"},
+  {src: "qbe_emit.cm", name: "qbe_emit", out: "qbe_emit.mach"},
  {src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"},
  {src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"}
 ]
--- a/run_native.ce
+++ b/run_native.ce
@@ -0,0 +1,69 @@
+// run_native.ce — load a module both interpreted and native, compare speed
+//
+// Usage:
+//   cell --core . run_native.ce <module>
+//
+// Loads <module>.cm via use() (interpreted) and <module>.dylib (native),
+// runs both and compares results and timing.
+
+var os = use('os')
+
+if (length(args) < 1) {
+  print('usage: cell --core . run_native.ce <module>')
+  print('  e.g. cell --core . run_native.ce num_torture')
+  return
+}
+
+var name = args[0]
+if (ends_with(name, '.cm')) {
+  name = text(name, 0, length(name) - 3)
+}
+
+var safe = replace(replace(name, '/', '_'), '-', '_')
+var symbol = 'js_' + safe + '_use'
+var dylib_path = './' + name + '.dylib'
+var fd = use('fd')
+
+// --- Interpreted run ---
+print('--- interpreted ---')
+var t1 = os.now()
+var result_interp = use(name)
+var t2 = os.now()
+var ms_interp = (t2 - t1) / 1000000
+print('result: ' + text(result_interp))
+print('time:   ' + text(ms_interp) + ' ms')
+
+// --- Native run ---
+if (!fd.is_file(dylib_path)) {
+  print('\nno ' + dylib_path + ' found — run compile.ce first')
+  return
+}
+
+print('\n--- native ---')
+var t3 = os.now()
+var lib = os.dylib_open(dylib_path)
+var t4 = os.now()
+var result_native = os.dylib_symbol(lib, symbol)
+var t5 = os.now()
+var ms_load = (t4 - t3) / 1000000
+var ms_exec = (t5 - t4) / 1000000
+var ms_native = (t5 - t3) / 1000000
+print('result: ' + text(result_native))
+print('load:   ' + text(ms_load) + ' ms')
+print('exec:   ' + text(ms_exec) + ' ms')
+print('total:  ' + text(ms_native) + ' ms')
+
+// --- Comparison ---
+print('\n--- comparison ---')
+var match = result_interp == result_native
+var speedup = 0
+var speedup_exec = 0
+print('match:  ' + text(match))
+if (ms_native > 0) {
+  speedup = ms_interp / ms_native
+  print('speedup: ' + text(speedup) + 'x (total)')
+}
+if (ms_exec > 0) {
+  speedup_exec = ms_interp / ms_exec
+  print('speedup: ' + text(speedup_exec) + 'x (exec only)')
+}
--- a/source/cell.c
+++ b/source/cell.c
@@ -323,6 +323,7 @@ int cell_init(int argc, char **argv)

  /* Default: run script through bootstrap pipeline */
  int use_mcode = 0;
+  int emit_qbe = 0;
  int arg_start = 1;
  const char *shop_override = NULL;
  const char *core_override = NULL;
@@ -332,6 +333,10 @@ int cell_init(int argc, char **argv)
    if (strcmp(argv[arg_start], "--mcode") == 0) {
      use_mcode = 1;
      arg_start++;
+    } else if (strcmp(argv[arg_start], "--emit-qbe") == 0) {
+      use_mcode = 1;  // QBE requires mcode pipeline
+      emit_qbe = 1;
+      arg_start++;
    } else if (strcmp(argv[arg_start], "--shop") == 0) {
      if (arg_start + 1 >= argc) {
        printf("ERROR: --shop requires a path argument\n");
@@ -416,6 +421,7 @@ int cell_init(int argc, char **argv)
  JS_SetPropertyStr(ctx, hidden_env, "shop_path",
    shop_path ? JS_NewString(ctx, shop_path) : JS_NULL);
  JS_SetPropertyStr(ctx, hidden_env, "use_mcode", JS_NewBool(ctx, use_mcode));
+  JS_SetPropertyStr(ctx, hidden_env, "emit_qbe", JS_NewBool(ctx, emit_qbe));
  JS_SetPropertyStr(ctx, hidden_env, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val));
  JS_SetPropertyStr(ctx, hidden_env, "json", js_json_use(ctx));
  JS_SetPropertyStr(ctx, hidden_env, "nota", js_nota_use(ctx));
--- a/streamline.ce
+++ b/streamline.ce
@@ -0,0 +1,15 @@
+var fd = use("fd")
+var json = use("json")
+var tokenize = use("tokenize")
+var parse = use("parse")
+var fold = use("fold")
+var mcode = use("mcode")
+var streamline = use("streamline")
+var filename = args[0]
+var src = text(fd.slurp(filename))
+var result = tokenize(src, filename)
+var ast = parse(result.tokens, src, filename, tokenize)
+var folded = fold(ast)
+var compiled = mcode(folded)
+var optimized = streamline(compiled)
+print(json.encode(optimized))
--- a/streamline.cm
+++ b/streamline.cm
@@ -0,0 +1,351 @@
+// streamline.cm — mcode IR optimizer
+// Single forward pass: type inference + strength reduction
+
+var streamline = function(ir) {
+  // Type constants
+  var T_UNKNOWN = "unknown"
+  var T_INT = "int"
+  var T_FLOAT = "float"
+  var T_NUM = "num"
+  var T_TEXT = "text"
+  var T_BOOL = "bool"
+  var T_NULL = "null"
+
+  // Integer arithmetic ops that produce integer results
+  var int_result_ops = {
+    add_int: true, sub_int: true, mul_int: true,
+    div_int: true, mod_int: true
+  }
+
+  // Float arithmetic ops that produce float results
+  var float_result_ops = {
+    add_float: true, sub_float: true, mul_float: true,
+    div_float: true, mod_float: true
+  }
+
+  // Comparison ops that produce bool results
+  var bool_result_ops = {
+    eq_int: true, ne_int: true, lt_int: true, gt_int: true,
+    le_int: true, ge_int: true,
+    eq_float: true, ne_float: true, lt_float: true, gt_float: true,
+    le_float: true, ge_float: true,
+    eq_text: true, ne_text: true, lt_text: true, gt_text: true,
+    le_text: true, ge_text: true,
+    eq_bool: true, ne_bool: true,
+    eq_tol: true, ne_tol: true,
+    not: true, and: true, or: true,
+    is_int: true, is_text: true, is_num: true,
+    is_bool: true, is_null: true, is_identical: true
+  }
+
+  // Type check opcodes and what type they verify
+  var type_check_map = {
+    is_int: T_INT,
+    is_text: T_TEXT,
+    is_num: T_NUM,
+    is_bool: T_BOOL,
+    is_null: T_NULL
+  }
+
+  // Determine the type of an access literal value
+  var access_value_type = function(val) {
+    if (is_number(val)) {
+      if (is_integer(val)) {
+        return T_INT
+      }
+      return T_FLOAT
+    }
+    if (is_text(val)) {
+      return T_TEXT
+    }
+    return T_UNKNOWN
+  }
+
+  // Update slot_types for an instruction (shared tracking logic)
+  var track_types = function(slot_types, instr) {
+    var op = instr[0]
+    var src_type = null
+
+    if (op == "access") {
+      slot_types[text(instr[1])] = access_value_type(instr[2])
+    } else if (op == "int") {
+      slot_types[text(instr[1])] = T_INT
+    } else if (op == "true" || op == "false") {
+      slot_types[text(instr[1])] = T_BOOL
+    } else if (op == "null") {
+      slot_types[text(instr[1])] = T_NULL
+    } else if (op == "move") {
+      src_type = slot_types[text(instr[2])]
+      if (src_type != null) {
+        slot_types[text(instr[1])] = src_type
+      } else {
+        slot_types[text(instr[1])] = T_UNKNOWN
+      }
+    } else if (int_result_ops[op] == true) {
+      slot_types[text(instr[1])] = T_INT
+    } else if (float_result_ops[op] == true) {
+      slot_types[text(instr[1])] = T_FLOAT
+    } else if (op == "concat") {
+      slot_types[text(instr[1])] = T_TEXT
+    } else if (bool_result_ops[op] == true) {
+      slot_types[text(instr[1])] = T_BOOL
+    } else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
+      slot_types[text(instr[1])] = T_UNKNOWN
+    } else if (op == "invoke") {
+      slot_types[text(instr[2])] = T_UNKNOWN
+    } else if (op == "pop" || op == "get" || op == "function") {
+      slot_types[text(instr[1])] = T_UNKNOWN
+    } else if (op == "typeof") {
+      slot_types[text(instr[1])] = T_TEXT
+    } else if (op == "neg_int") {
+      slot_types[text(instr[1])] = T_INT
+    } else if (op == "neg_float") {
+      slot_types[text(instr[1])] = T_FLOAT
+    } else if (op == "bitnot" || op == "bitand" || op == "bitor" ||
+               op == "bitxor" || op == "shl" || op == "shr" || op == "ushr") {
+      slot_types[text(instr[1])] = T_INT
+    }
+    return null
+  }
+
+  // Check if a slot has a known type (with T_NUM subsumption)
+  var slot_is = function(slot_types, slot, typ) {
+    var known = slot_types[text(slot)]
+    if (known == null) {
+      return false
+    }
+    if (known == typ) {
+      return true
+    }
+    if (typ == T_NUM && (known == T_INT || known == T_FLOAT)) {
+      return true
+    }
+    return false
+  }
+
+  // Optimize a single function's instructions
+  var optimize_function = function(func) {
+    var instructions = func.instructions
+    var num_instr = 0
+    var slot_types = null
+    var nop_counter = 0
+    var i = 0
+    var instr = null
+    var op = null
+    var dest = 0
+    var src = 0
+    var checked_type = null
+    var next = null
+    var next_op = null
+    var target_label = null
+    var src_known = null
+    var jlen = 0
+    var j = 0
+    var peek = null
+
+    if (instructions == null || length(instructions) == 0) {
+      return null
+    }
+
+    num_instr = length(instructions)
+    slot_types = {}
+
+    // Peephole optimization pass: type tracking + strength reduction
+    i = 0
+    while (i < num_instr) {
+      instr = instructions[i]
+
+      // Labels are join points: clear all type info (conservative)
+      if (is_text(instr)) {
+        slot_types = {}
+        i = i + 1
+        continue
+      }
+
+      if (!is_array(instr)) {
+        i = i + 1
+        continue
+      }
+
+      op = instr[0]
+
+      // --- Peephole: type-check + jump where we know the type ---
+      if (type_check_map[op] != null && i + 1 < num_instr) {
+        dest = instr[1]
+        src = instr[2]
+        checked_type = type_check_map[op]
+        next = instructions[i + 1]
+
+        if (is_array(next)) {
+          next_op = next[0]
+
+          // Pattern: is_<type> t, x  ->  jump_false t, label
+          if (next_op == "jump_false" && next[1] == dest) {
+            target_label = next[2]
+
+            if (slot_is(slot_types, src, checked_type)) {
+              // Known match: check always true, never jumps — eliminate both
+              nop_counter = nop_counter + 1
+              instructions[i] = "_nop_" + text(nop_counter)
+              nop_counter = nop_counter + 1
+              instructions[i + 1] = "_nop_" + text(nop_counter)
+              slot_types[text(dest)] = T_BOOL
+              i = i + 2
+              continue
+            }
+
+            src_known = slot_types[text(src)]
+            if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) {
+              // Check for T_NUM subsumption: INT and FLOAT match T_NUM
+              if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) {
+                // Actually matches — eliminate both
+                nop_counter = nop_counter + 1
+                instructions[i] = "_nop_" + text(nop_counter)
+                nop_counter = nop_counter + 1
+                instructions[i + 1] = "_nop_" + text(nop_counter)
+                slot_types[text(dest)] = T_BOOL
+                i = i + 2
+                continue
+              }
+              // Known mismatch: always jumps — nop the check, rewrite jump
+              nop_counter = nop_counter + 1
+              instructions[i] = "_nop_" + text(nop_counter)
+              jlen = length(next)
+              instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]]
+              slot_types[text(dest)] = T_UNKNOWN
+              i = i + 2
+              continue
+            }
+
+            // Unknown: can't eliminate, but narrow type on fallthrough
+            slot_types[text(dest)] = T_BOOL
+            slot_types[text(src)] = checked_type
+            i = i + 2
+            continue
+          }
+
+          // Pattern: is_<type> t, x  ->  jump_true t, label
+          if (next_op == "jump_true" && next[1] == dest) {
+            target_label = next[2]
+
+            if (slot_is(slot_types, src, checked_type)) {
+              // Known match: always true, always jumps — nop check, rewrite to jump
+              nop_counter = nop_counter + 1
+              instructions[i] = "_nop_" + text(nop_counter)
+              jlen = length(next)
+              instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]]
+              slot_types[text(dest)] = T_BOOL
+              i = i + 2
+              continue
+            }
+
+            src_known = slot_types[text(src)]
+            if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) {
+              if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) {
+                // Actually matches T_NUM — always jumps
+                nop_counter = nop_counter + 1
+                instructions[i] = "_nop_" + text(nop_counter)
+                jlen = length(next)
+                instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]]
+                slot_types[text(dest)] = T_BOOL
+                i = i + 2
+                continue
+              }
+              // Known mismatch: never jumps — eliminate both
+              nop_counter = nop_counter + 1
+              instructions[i] = "_nop_" + text(nop_counter)
+              nop_counter = nop_counter + 1
+              instructions[i + 1] = "_nop_" + text(nop_counter)
+              slot_types[text(dest)] = T_BOOL
+              i = i + 2
+              continue
+            }
+
+            // Unknown: can't optimize
+            slot_types[text(dest)] = T_BOOL
+            i = i + 2
+            continue
+          }
+        }
+
+        // Standalone type check (no jump following): just track the result
+        slot_types[text(dest)] = T_BOOL
+        i = i + 1
+        continue
+      }
+
+      // --- Strength reduction: load_dynamic / store_dynamic ---
+      if (op == "load_dynamic") {
+        if (slot_is(slot_types, instr[3], T_TEXT)) {
+          instr[0] = "load_field"
+        } else if (slot_is(slot_types, instr[3], T_INT)) {
+          instr[0] = "load_index"
+        }
+        slot_types[text(instr[1])] = T_UNKNOWN
+        i = i + 1
+        continue
+      }
+      if (op == "store_dynamic") {
+        if (slot_is(slot_types, instr[3], T_TEXT)) {
+          instr[0] = "store_field"
+        } else if (slot_is(slot_types, instr[3], T_INT)) {
+          instr[0] = "store_index"
+        }
+        i = i + 1
+        continue
+      }
+
+      // --- Standard type tracking ---
+      track_types(slot_types, instr)
+
+      i = i + 1
+    }
+
+    // Second pass: remove dead jumps (jump to the immediately next label)
+    i = 0
+    while (i < num_instr) {
+      instr = instructions[i]
+      if (is_array(instr) && instr[0] == "jump") {
+        target_label = instr[1]
+        // Check if the very next non-nop item is that label
+        j = i + 1
+        while (j < num_instr) {
+          peek = instructions[j]
+          if (is_text(peek)) {
+            if (peek == target_label) {
+              nop_counter = nop_counter + 1
+              instructions[i] = "_nop_" + text(nop_counter)
+            }
+            break
+          }
+          if (is_array(peek)) {
+            break
+          }
+          j = j + 1
+        }
+      }
+      i = i + 1
+    }
+
+    return null
+  }
+
+  // Process main function
+  if (ir.main != null) {
+    optimize_function(ir.main)
+  }
+
+  // Process all sub-functions
+  var fi = 0
+  if (ir.functions != null) {
+    fi = 0
+    while (fi < length(ir.functions)) {
+      optimize_function(ir.functions[fi])
+      fi = fi + 1
+    }
+  }
+
+  return ir
+}
+
+return streamline
--- a/streamline.mach
+++ b/streamline.mach
--- a/tokenize.ce
+++ b/tokenize.ce
@@ -1,4 +1,5 @@
 var fd = use("fd")
+var json = use("json")
 var tokenize = use("tokenize")
 var filename = args[0]
 var src = text(fd.slurp(filename))