From 7b46c6e9479c03c019bd315da68e09d3b20c307d Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Thu, 12 Feb 2026 16:34:45 -0600 Subject: [PATCH] update docs --- docs/spec/.pages | 3 + docs/spec/mach.md | 93 +------- docs/spec/mcode.md | 364 +++++++++++++++++++++++++------- docs/spec/pipeline.md | 55 ++--- website/data/spec_sections.yaml | 18 +- 5 files changed, 324 insertions(+), 209 deletions(-) create mode 100644 docs/spec/.pages diff --git a/docs/spec/.pages b/docs/spec/.pages new file mode 100644 index 00000000..fb0b88a5 --- /dev/null +++ b/docs/spec/.pages @@ -0,0 +1,3 @@ +nav: + - pipeline.md + - mcode.md diff --git a/docs/spec/mach.md b/docs/spec/mach.md index 0419d209..54ae4ed1 100644 --- a/docs/spec/mach.md +++ b/docs/spec/mach.md @@ -1,11 +1,13 @@ --- title: "Register VM" -description: "Register-based virtual machine (Mach)" +description: "Binary encoding of the Mach bytecode interpreter" --- ## Overview -The Mach VM is a register-based virtual machine using 32-bit instructions. It is modeled after Lua's register VM — operands are register indices rather than stack positions, reducing instruction count and improving performance. +The Mach VM is a register-based virtual machine that directly interprets the [Mcode IR](mcode.md) instruction set as compact 32-bit binary bytecode. It is modeled after Lua's register VM — operands are register indices rather than stack positions, reducing instruction count and improving performance. + +The Mach serializer (`mach.c`) converts streamlined mcode JSON into binary instructions. Since the Mach bytecode is a direct encoding of the mcode, the [Mcode IR](mcode.md) reference is the authoritative instruction set documentation. ## Instruction Formats @@ -45,95 +47,12 @@ Used for unconditional jumps with a 24-bit signed offset. ## Registers -Each function frame has a fixed number of register slots, determined at compile time. Registers hold: +Each function frame has a fixed number of register slots, determined at compile time: - **R(0)** — `this` binding - **R(1)..R(arity)** — function arguments - **R(arity+1)..** — local variables and temporaries -## Instruction Set - -### Loading - -| Opcode | Format | Description | -|--------|--------|-------------| -| `LOADK` | iABx | `R(A) = K(Bx)` — load from constant pool | -| `LOADI` | iAsBx | `R(A) = sBx` — load small integer | -| `LOADNULL` | iA | `R(A) = null` | -| `LOADTRUE` | iA | `R(A) = true` | -| `LOADFALSE` | iA | `R(A) = false` | -| `MOVE` | iABC | `R(A) = R(B)` — register copy | - -### Arithmetic - -| Opcode | Format | Description | -|--------|--------|-------------| -| `ADD` | iABC | `R(A) = R(B) + R(C)` | -| `SUB` | iABC | `R(A) = R(B) - R(C)` | -| `MUL` | iABC | `R(A) = R(B) * R(C)` | -| `DIV` | iABC | `R(A) = R(B) / R(C)` | -| `MOD` | iABC | `R(A) = R(B) % R(C)` | -| `POW` | iABC | `R(A) = R(B) ^ R(C)` | -| `NEG` | iABC | `R(A) = -R(B)` | -| `INC` | iABC | `R(A) = R(B) + 1` | -| `DEC` | iABC | `R(A) = R(B) - 1` | - -### Comparison - -| Opcode | Format | Description | -|--------|--------|-------------| -| `EQ` | iABC | `R(A) = R(B) == R(C)` | -| `NEQ` | iABC | `R(A) = R(B) != R(C)` | -| `LT` | iABC | `R(A) = R(B) < R(C)` | -| `LE` | iABC | `R(A) = R(B) <= R(C)` | -| `GT` | iABC | `R(A) = R(B) > R(C)` | -| `GE` | iABC | `R(A) = R(B) >= R(C)` | - -### Property Access - -| Opcode | Format | Description | -|--------|--------|-------------| -| `GETFIELD` | iABC | `R(A) = R(B)[K(C)]` — named property | -| `SETFIELD` | iABC | `R(A)[K(B)] = R(C)` — set named property | -| `GETINDEX` | iABC | `R(A) = R(B)[R(C)]` — computed property | -| `SETINDEX` | iABC | `R(A)[R(B)] = R(C)` — set computed property | - -### Variable Resolution - -| Opcode | Format | Description | -|--------|--------|-------------| -| `GETNAME` | iABx | Unresolved variable (compiler placeholder) | -| `GETINTRINSIC` | iABx | Global intrinsic / built-in | -| `GETENV` | iABx | Module environment variable | -| `GETUP` | iABC | `R(A) = UpFrame(B).slots[C]` — closure upvalue | -| `SETUP` | iABC | `UpFrame(A).slots[B] = R(C)` — set closure upvalue | - -### Control Flow - -| Opcode | Format | Description | -|--------|--------|-------------| -| `JMP` | isJ | Unconditional jump | -| `JMPTRUE` | iAsBx | Jump if `R(A)` is true | -| `JMPFALSE` | iAsBx | Jump if `R(A)` is false | -| `JMPNULL` | iAsBx | Jump if `R(A)` is null | - -### Function Calls - -| Opcode | Format | Description | -|--------|--------|-------------| -| `CALL` | iABC | Call `R(A)` with `B` args starting at `R(A+1)`, `C`=keep result | -| `RETURN` | iA | Return `R(A)` | -| `RETNIL` | — | Return null | -| `CLOSURE` | iABx | Create closure from function pool entry `Bx` | - -### Object / Array - -| Opcode | Format | Description | -|--------|--------|-------------| -| `NEWOBJECT` | iA | `R(A) = {}` | -| `NEWARRAY` | iABC | `R(A) = array(B)` | -| `PUSH` | iABC | Push `R(B)` to array `R(A)` | - ## JSCodeRegister The compiled output for a function: @@ -149,7 +68,7 @@ struct JSCodeRegister { uint32_t func_count; // nested function count JSCodeRegister **functions; // nested function table JSValue name; // function name - uint16_t disruption_pc; // exception handler offset + uint16_t disruption_pc; // disruption handler offset }; ``` diff --git a/docs/spec/mcode.md b/docs/spec/mcode.md index cccd5485..25ccf644 100644 --- a/docs/spec/mcode.md +++ b/docs/spec/mcode.md @@ -1,23 +1,260 @@ --- title: "Mcode IR" -description: "JSON-based intermediate representation" +description: "Instruction set reference for the JSON-based intermediate representation" --- ## Overview -Mcode is a JSON-based intermediate representation that can be interpreted directly. It represents the same operations as the Mach register VM but uses string-based instruction dispatch rather than binary opcodes. Mcode is intended as an intermediate step toward native code compilation. - -## Pipeline +Mcode is the intermediate representation at the center of the ƿit compilation pipeline. All source code is lowered to mcode before execution or native compilation. The mcode instruction set is the **authoritative reference** for the operations supported by the ƿit runtime — the Mach VM bytecode is a direct binary encoding of these same instructions. ``` -Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Mach VM (default) - → Mcode Interpreter - → QBE → Native +Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine ``` -Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result is serialized to binary bytecode by the Mach compiler (`mach.c`), interpreted directly by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview. +Mcode is produced by `mcode.cm`, optimized by `streamline.cm`, then either serialized to 32-bit bytecode for the Mach VM (`mach.c`), or lowered to QBE/LLVM IL for native compilation (`qbe_emit.cm`). See [Compilation Pipeline](pipeline.md) for the full overview. -### Function Proxy Decomposition +## Instruction Format + +Each instruction is a JSON array. The first element is the instruction name (string), followed by operands. The last two elements are line and column numbers for source mapping: + +```json +["add_int", dest, a, b, line, col] +["load_field", dest, obj, "key", line, col] +["jump", "label_name"] +``` + +Operands are register slot numbers (integers), constant values (strings, numbers), or label names (strings). + +## Instruction Reference + +### Loading and Constants + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `access` | `dest, name` | Load variable by name (intrinsic or environment) | +| `int` | `dest, value` | Load integer constant | +| `true` | `dest` | Load boolean `true` | +| `false` | `dest` | Load boolean `false` | +| `null` | `dest` | Load `null` | +| `move` | `dest, src` | Copy register value | +| `function` | `dest, id` | Load nested function by index | +| `regexp` | `dest, pattern` | Create regexp object | + +### Arithmetic — Integer + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `add_int` | `dest, a, b` | `dest = a + b` (integer) | +| `sub_int` | `dest, a, b` | `dest = a - b` (integer) | +| `mul_int` | `dest, a, b` | `dest = a * b` (integer) | +| `div_int` | `dest, a, b` | `dest = a / b` (integer) | +| `mod_int` | `dest, a, b` | `dest = a % b` (integer) | +| `neg_int` | `dest, src` | `dest = -src` (integer) | + +### Arithmetic — Float + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `add_float` | `dest, a, b` | `dest = a + b` (float) | +| `sub_float` | `dest, a, b` | `dest = a - b` (float) | +| `mul_float` | `dest, a, b` | `dest = a * b` (float) | +| `div_float` | `dest, a, b` | `dest = a / b` (float) | +| `mod_float` | `dest, a, b` | `dest = a % b` (float) | +| `neg_float` | `dest, src` | `dest = -src` (float) | + +### Arithmetic — Generic + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `pow` | `dest, a, b` | `dest = a ^ b` (exponentiation) | + +### Text + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `concat` | `dest, a, b` | `dest = a ~ b` (text concatenation) | + +### Comparison — Integer + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `eq_int` | `dest, a, b` | `dest = a == b` (integer) | +| `ne_int` | `dest, a, b` | `dest = a != b` (integer) | +| `lt_int` | `dest, a, b` | `dest = a < b` (integer) | +| `le_int` | `dest, a, b` | `dest = a <= b` (integer) | +| `gt_int` | `dest, a, b` | `dest = a > b` (integer) | +| `ge_int` | `dest, a, b` | `dest = a >= b` (integer) | + +### Comparison — Float + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `eq_float` | `dest, a, b` | `dest = a == b` (float) | +| `ne_float` | `dest, a, b` | `dest = a != b` (float) | +| `lt_float` | `dest, a, b` | `dest = a < b` (float) | +| `le_float` | `dest, a, b` | `dest = a <= b` (float) | +| `gt_float` | `dest, a, b` | `dest = a > b` (float) | +| `ge_float` | `dest, a, b` | `dest = a >= b` (float) | + +### Comparison — Text + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `eq_text` | `dest, a, b` | `dest = a == b` (text) | +| `ne_text` | `dest, a, b` | `dest = a != b` (text) | +| `lt_text` | `dest, a, b` | `dest = a < b` (lexicographic) | +| `le_text` | `dest, a, b` | `dest = a <= b` (lexicographic) | +| `gt_text` | `dest, a, b` | `dest = a > b` (lexicographic) | +| `ge_text` | `dest, a, b` | `dest = a >= b` (lexicographic) | + +### Comparison — Boolean + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `eq_bool` | `dest, a, b` | `dest = a == b` (boolean) | +| `ne_bool` | `dest, a, b` | `dest = a != b` (boolean) | + +### Comparison — Special + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `is_identical` | `dest, a, b` | Object identity check (same reference) | +| `eq_tol` | `dest, a, b` | Equality with tolerance | +| `ne_tol` | `dest, a, b` | Inequality with tolerance | + +### Type Checks + +Inlined from intrinsic function calls. Each sets `dest` to `true` or `false`. + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `is_int` | `dest, src` | Check if integer | +| `is_num` | `dest, src` | Check if number (integer or float) | +| `is_text` | `dest, src` | Check if text | +| `is_bool` | `dest, src` | Check if logical | +| `is_null` | `dest, src` | Check if null | +| `is_array` | `dest, src` | Check if array | +| `is_func` | `dest, src` | Check if function | +| `is_record` | `dest, src` | Check if record (object) | +| `is_stone` | `dest, src` | Check if stone (immutable) | +| `is_proxy` | `dest, src` | Check if function proxy (arity 2) | + +### Logical + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `not` | `dest, src` | Logical NOT | +| `and` | `dest, a, b` | Logical AND | +| `or` | `dest, a, b` | Logical OR | + +### Bitwise + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `bitand` | `dest, a, b` | Bitwise AND | +| `bitor` | `dest, a, b` | Bitwise OR | +| `bitxor` | `dest, a, b` | Bitwise XOR | +| `bitnot` | `dest, src` | Bitwise NOT | +| `shl` | `dest, a, b` | Shift left | +| `shr` | `dest, a, b` | Arithmetic shift right | +| `ushr` | `dest, a, b` | Unsigned shift right | + +### Property Access + +Memory operations come in typed variants. The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold. + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `load_field` | `dest, obj, key` | Load record property by string key | +| `store_field` | `obj, val, key` | Store record property by string key | +| `load_index` | `dest, obj, idx` | Load array element by integer index | +| `store_index` | `obj, val, idx` | Store array element by integer index | +| `load_dynamic` | `dest, obj, key` | Load property (dispatches at runtime) | +| `store_dynamic` | `obj, val, key` | Store property (dispatches at runtime) | +| `delete` | `obj, key` | Delete property | +| `in` | `dest, obj, key` | Check if property exists | +| `typeof` | `dest, src` | Get type name as text | +| `length` | `dest, src` | Get length of array or text | + +### Object and Array Construction + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `record` | `dest` | Create empty record `{}` | +| `array` | `dest, n, ...elems` | Create array with `n` elements | +| `push` | `arr, val` | Push value to array | +| `pop` | `dest, arr` | Pop value from array | + +### Function Calls + +Function calls are decomposed into three instructions: + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `frame` | `dest, fn, argc` | Allocate call frame for `fn` with `argc` arguments | +| `setarg` | `frame, idx, val` | Set argument `idx` in call frame | +| `invoke` | `frame, result` | Execute the call, store result | +| `goframe` | `dest, fn, argc` | Allocate frame for async/concurrent call | +| `goinvoke` | `frame, result` | Invoke async/concurrent call | + +### Variable Resolution + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `access` | `dest, name` | Load variable (intrinsic or module environment) | +| `set_var` | `name, src` | Set top-level variable by name | +| `get` | `dest, level, slot` | Get closure variable from parent scope | +| `put` | `level, slot, src` | Set closure variable in parent scope | + +### Control Flow + +| Instruction | Operands | Description | +|-------------|----------|-------------| +| `LABEL` | `name` | Define a named label (not executed) | +| `jump` | `label` | Unconditional jump | +| `jump_true` | `cond, label` | Jump if `cond` is true | +| `jump_false` | `cond, label` | Jump if `cond` is false | +| `jump_not_null` | `val, label` | Jump if `val` is not null | +| `return` | `src` | Return value from function | +| `disrupt` | — | Trigger disruption (error) | + +## Typed Instruction Design + +A key design principle of mcode is that **every type check is an explicit instruction**. Arithmetic and comparison operations come in type-specialized variants (`add_int`, `add_float`, `eq_text`, etc.) rather than a single polymorphic instruction. + +When type information is available from the fold stage, the compiler emits the typed variant directly. When the type is unknown, the compiler emits a type-check/dispatch pattern: + +```json +["is_int", check, a] +["jump_false", check, "float_path"] +["add_int", dest, a, b] +["jump", "done"] +["LABEL", "float_path"] +["add_float", dest, a, b] +["LABEL", "done"] +``` + +The [Streamline Optimizer](streamline.md) eliminates dead branches when types are statically known, collapsing the dispatch to a single typed instruction. + +## Intrinsic Inlining + +The mcode compiler recognizes calls to built-in intrinsic functions and emits direct opcodes instead of the generic frame/setarg/invoke call sequence: + +| Source call | Emitted instruction | +|-------------|-------------------| +| `is_array(x)` | `is_array dest, src` | +| `is_function(x)` | `is_func dest, src` | +| `is_object(x)` | `is_record dest, src` | +| `is_stone(x)` | `is_stone dest, src` | +| `is_integer(x)` | `is_int dest, src` | +| `is_text(x)` | `is_text dest, src` | +| `is_number(x)` | `is_num dest, src` | +| `is_logical(x)` | `is_bool dest, src` | +| `is_null(x)` | `is_null dest, src` | +| `length(x)` | `length dest, src` | +| `push(arr, val)` | `push arr, val` | + +## Function Proxy Decomposition When the compiler encounters a method call `obj.method(args)`, it emits a branching pattern to handle ƿit's function proxy protocol. An arity-2 function used as a proxy target receives the method name and argument array instead of a normal method call: @@ -25,9 +262,8 @@ When the compiler encounters a method call `obj.method(args)`, it emits a branch ["is_proxy", check, obj] ["jump_false", check, "record_path"] -// Proxy path: call obj(name, [args...]) with this=null ["access", name_slot, "method"] -["array", args_arr, N, arg0, arg1, ...] +["array", args_arr, N, arg0, arg1] ["null", null_slot] ["frame", f, obj, 2] ["setarg", f, 0, null_slot] @@ -41,21 +277,38 @@ When the compiler encounters a method call `obj.method(args)`, it emits a branch ["frame", f2, method, N] ["setarg", f2, 0, obj] ["setarg", f2, 1, arg0] -... ["invoke", f2, dest] ["LABEL", "done"] ``` -The streamline optimizer can eliminate the dead branch when the type of `obj` is statically known. +## Labels and Control Flow -## JSMCode Structure +Control flow uses named labels instead of numeric offsets: + +```json +["LABEL", "loop_start"] +["add_int", 1, 1, 2] +["jump_false", 3, "loop_end"] +["jump", "loop_start"] +["LABEL", "loop_end"] +``` + +Labels are collected into a name-to-index map during loading, enabling O(1) jump resolution. The Mach serializer converts label names to numeric offsets in the binary bytecode. + +## Nop Convention + +The streamline optimizer replaces eliminated instructions with nop strings (e.g., `_nop_tc_1`, `_nop_bl_2`). Nop strings are skipped during interpretation and native code emission but preserved in the instruction array to maintain positional stability for jump targets. + +## Internal Structures + +### JSMCode (Mcode Interpreter) ```c struct JSMCode { uint16_t nr_args; // argument count uint16_t nr_slots; // register count - cJSON **instrs; // pre-flattened instruction array + cJSON **instrs; // instruction array uint32_t instr_count; // number of instructions struct { @@ -70,74 +323,25 @@ struct JSMCode { cJSON *json_root; // keeps JSON alive const char *name; // function name const char *filename; // source file - uint16_t disruption_pc; // exception handler offset + uint16_t disruption_pc; // disruption handler offset }; ``` -## Instruction Format +### JSCodeRegister (Mach VM Bytecode) -Each instruction is a JSON array. The first element is the instruction name (string), followed by operands (typically `[op, dest, ...args, line, col]`): - -```json -["access", 3, 5, 1, 9] -["load_index", 10, 4, 9, 5, 11] -["store_dynamic", 4, 11, 12, 6, 3] -["frame", 15, 14, 1, 7, 7] -["setarg", 15, 0, 16, 7, 7] -["invoke", 15, 13, 7, 7] +```c +struct JSCodeRegister { + uint16_t arity; // argument count + uint16_t nr_slots; // total register count + uint32_t cpool_count; // constant pool size + JSValue *cpool; // constant pool + uint32_t instr_count; // instruction count + MachInstr32 *instructions; // 32-bit instruction array + uint32_t func_count; // nested function count + JSCodeRegister **functions; // nested function table + JSValue name; // function name + uint16_t disruption_pc; // disruption handler offset +}; ``` -### Typed Load/Store - -Memory operations come in typed variants for optimization: - -- `load_index dest, obj, idx` — array element by integer index -- `load_field dest, obj, key` — record property by string key -- `load_dynamic dest, obj, key` — unknown; dispatches at runtime -- `store_index obj, val, idx` — array element store -- `store_field obj, val, key` — record property store -- `store_dynamic obj, val, key` — unknown; dispatches at runtime - -The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold. - -### Decomposed Calls - -Function calls are split into separate instructions: - -- `frame dest, fn, argc` — allocate call frame -- `setarg frame, idx, val` — set argument -- `invoke frame, result` — execute the call - -## Labels - -Control flow uses named labels instead of numeric offsets: - -```json -["LABEL", "loop_start"] -["ADD", 1, 1, 2] -["JMPFALSE", 3, "loop_end"] -["JMP", "loop_start"] -["LABEL", "loop_end"] -``` - -Labels are collected into a name-to-index map during loading, enabling O(1) jump resolution. - -## Differences from Mach - -| Property | Mcode | Mach | -|----------|-------|------| -| Instructions | cJSON arrays | 32-bit binary | -| Dispatch | String comparison | Switch on opcode byte | -| Constants | Inline in JSON | Separate constant pool | -| Jump targets | Named labels | Numeric offsets | -| Memory | Heap (cJSON nodes) | Off-heap (malloc) | - -## Purpose - -Mcode serves as an inspectable, debuggable intermediate format: - -- **Human-readable** — the JSON representation can be printed and examined -- **Language-independent** — any tool that produces the correct JSON can target the ƿit runtime -- **Compilation target** — the Mach compiler can consume mcode as input, and future native code generators can work from the same representation - -The cost of string-based dispatch makes mcode slower than the binary Mach VM, so it is primarily useful during development and as a compilation intermediate rather than for production execution. +The Mach serializer (`mach.c`) converts the JSON mcode into compact 32-bit instructions with a constant pool. See [Register VM](mach.md) for the binary encoding formats. diff --git a/docs/spec/pipeline.md b/docs/spec/pipeline.md index 2a64ee15..17ce0367 100644 --- a/docs/spec/pipeline.md +++ b/docs/spec/pipeline.md @@ -5,14 +5,17 @@ description: "Overview of the compilation stages and optimizations" ## Overview -The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. All backends share the same path through mcode and streamline. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental). +The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. All backends share the same path through mcode and streamline. ``` -Source → Tokenize → Parse → Fold → Mcode → Streamline → Mach VM (default) - → Mcode Interpreter - → QBE → Native +Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine ``` +The final **machine** stage has two targets: + +- **Mach VM** — a register-based bytecode interpreter that directly executes the mcode instruction set as compact 32-bit binary +- **Native code** — lowers mcode to QBE or LLVM intermediate language, then compiles to machine code for the target CPU architecture + ## Stages ### Tokenize (`tokenize.cm`) @@ -49,7 +52,7 @@ Lowers the AST to a JSON-based intermediate representation with explicit operati - **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup. - **Intrinsic inlining**: Type-check intrinsics (`is_array`, `is_text`, `is_number`, `is_integer`, `is_logical`, `is_null`, `is_function`, `is_object`, `is_stone`), `length`, and `push` are emitted as direct opcodes instead of frame/setarg/invoke call sequences. -See [Mcode IR](mcode.md) for instruction format details. +See [Mcode IR](mcode.md) for the instruction format and complete instruction reference. ### Streamline (`streamline.cm`) @@ -64,46 +67,26 @@ Optimizes the Mcode IR through a series of independent passes. Operates per-func See [Streamline Optimizer](streamline.md) for detailed pass descriptions. -### QBE Emit (`qbe_emit.cm`) +### Machine -Lowers optimized Mcode IR to QBE intermediate language for native code compilation. Each Mcode function becomes a QBE function that calls into the cell runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.). +The streamlined mcode is lowered to a machine target for execution. -String constants are interned in a data section. Integer constants are NaN-boxed inline. +#### Mach VM (default) -### QBE Macros (`qbe.cm`) - -Provides operation implementations as QBE IL templates. Each arithmetic, comparison, and type operation is defined as a function that emits the corresponding QBE instructions, handling type dispatch (integer, float, text paths) with proper guard checks. - -## Execution Backends - -### Mach VM (default) - -Binary 32-bit register VM. The Mach serializer (`mach.c`) converts streamlined mcode JSON into compact 32-bit bytecode with a constant pool. Used for production execution and bootstrapping. +The Mach VM is a register-based virtual machine that directly interprets the mcode instruction set as 32-bit binary bytecode. The Mach serializer (`mach.c`) converts streamlined mcode JSON into compact 32-bit instructions with a constant pool. Since the mach bytecode is a direct encoding of the mcode, the [Mcode IR](mcode.md) reference serves as the authoritative instruction set documentation. ``` -./cell script.ce +pit script.ce ``` -Debug the mach bytecode output: +#### Native Code (QBE / LLVM) + +Lowers the streamlined mcode to QBE or LLVM intermediate language for compilation to native machine code. Each mcode function becomes a native function that calls into the ƿit runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.). + +String constants are interned in a data section. Integer constants are encoded inline. ``` -./cell --core . --dump-mach script.ce -``` - -### Mcode Interpreter - -JSON-based interpreter. Used for debugging the compilation pipeline. - -``` -./cell --mcode script.ce -``` - -### QBE Native (experimental) - -Generates QBE IL that can be compiled to native code. - -``` -./cell --emit-qbe script.ce > output.ssa +pit --emit-qbe script.ce > output.ssa ``` ## Files diff --git a/website/data/spec_sections.yaml b/website/data/spec_sections.yaml index ab1af269..f24e83f9 100644 --- a/website/data/spec_sections.yaml +++ b/website/data/spec_sections.yaml @@ -1,4 +1,16 @@ sections: + - title: "Compilation Pipeline" + page: "/docs/spec/pipeline/" + id: "pipeline" + - title: "Mcode IR" + page: "/docs/spec/mcode/" + id: "mcode" + - title: "Streamline Optimizer" + page: "/docs/spec/streamline/" + id: "streamline" + - title: "Register VM" + page: "/docs/spec/mach/" + id: "mach" - title: "DEC64 Numbers" page: "/docs/spec/dec64/" id: "dec64" @@ -14,12 +26,6 @@ sections: - title: "Garbage Collection" page: "/docs/spec/gc/" id: "gc" - - title: "Register VM" - page: "/docs/spec/mach/" - id: "mach" - - title: "Mcode IR" - page: "/docs/spec/mcode/" - id: "mcode" - title: "Kim Encoding" page: "/docs/kim/" id: "kim"