Merge branch 'mach' into mcode2

This commit is contained in:
2026-02-10 19:04:22 -06:00
25 changed files with 2187 additions and 116 deletions

100
compile.ce Normal file
View File

@@ -0,0 +1,100 @@
// compile.ce — compile a .cm module to native .dylib via QBE
//
// Usage:
// cell --core . compile.ce <file.cm>
//
// Produces <file>.dylib in the current directory.
var fd = use('fd')
var os = use('os')
if (length(args) < 1) {
print('usage: cell --core . compile.ce <file.cm>')
return
}
var file = args[0]
var base = file
if (ends_with(base, '.cm')) {
base = text(base, 0, length(base) - 3)
}
var safe = replace(replace(base, '/', '_'), '-', '_')
var symbol = 'js_' + safe + '_use'
var tmp = '/tmp/qbe_' + safe
var ssa_path = tmp + '.ssa'
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/qbe_rt.o'
var dylib_path = base + '.dylib'
var cwd = fd.getcwd()
var rc = 0
// Step 1: emit QBE IL
print('emit qbe...')
rc = os.system('cd ' + cwd + ' && ./cell --core . --emit-qbe ' + file + ' > ' + ssa_path)
if (rc != 0) {
print('failed to emit qbe il')
return
}
// Step 2: post-process — insert dead labels after ret/jmp, append wrapper
// Use awk via shell to avoid blob/slurpwrite issues with long strings
print('post-process...')
var awk_cmd = `awk '
/^[[:space:]]*ret / || /^[[:space:]]*jmp / { need_label=1; print; next }
need_label && /^[[:space:]]*[^@}]/ && NF > 0 {
print "@_dead_" dead_id; dead_id++; need_label=0
}
/^@/ || /^}/ || NF==0 { need_label=0 }
{ print }
' ` + ssa_path + ` > ` + tmp + `_fixed.ssa`
rc = os.system(awk_cmd)
if (rc != 0) {
print('post-process failed')
return
}
// Append wrapper function
var wrapper_cmd = `printf '\nexport function l $` + symbol + `(l %%ctx) {\n@entry\n %%frame =l alloc8 4096\n %%result =l call $cell_main(l %%ctx, l %%frame)\n ret %%result\n}\n' >> ` + tmp + `_fixed.ssa`
rc = os.system(wrapper_cmd)
if (rc != 0) {
print('wrapper append failed')
return
}
// Step 3: compile QBE IL to assembly
print('qbe compile...')
rc = os.system('~/.local/bin/qbe -o ' + s_path + ' ' + tmp + '_fixed.ssa')
if (rc != 0) {
print('qbe compilation failed')
return
}
// Step 4: assemble
print('assemble...')
rc = os.system('cc -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('assembly failed')
return
}
// Step 5: compile runtime stubs (cached — skip if already built)
if (!fd.is_file(rt_o_path)) {
print('compile runtime stubs...')
rc = os.system('cc -c ' + cwd + '/qbe_rt.c -o ' + rt_o_path + ' -fPIC')
if (rc != 0) {
print('runtime stubs compilation failed')
return
}
}
// Step 6: link dylib
print('link...')
rc = os.system('cc -shared -fPIC -undefined dynamic_lookup ' + o_path + ' ' + rt_o_path + ' -o ' + cwd + '/' + dylib_path)
if (rc != 0) {
print('linking failed')
return
}
print('built: ' + dylib_path)

296
docs/spec/c-runtime.md Normal file
View File

@@ -0,0 +1,296 @@
---
title: "C Runtime for Native Code"
description: "Minimum C runtime surface for QBE-generated native code"
---
## Overview
QBE-generated native code calls into a C runtime for anything that touches the heap, dispatches dynamically, or requires GC awareness. The design principle: **native code handles control flow and integer math directly; everything else is a runtime call.**
This document defines the runtime boundary — what must be in C, what QBE handles inline, and how to organize the C code to serve both the mcode interpreter and native code cleanly.
## The Boundary
### What native code does inline (no C calls)
These operations compile to straight QBE instructions with no runtime involvement:
- **Integer arithmetic**: `add`, `sub`, `mul` on NaN-boxed ints (shift right 1, operate, shift left 1)
- **Integer comparisons**: extract int with shift, compare, produce tagged bool
- **Control flow**: jumps, branches, labels, function entry/exit
- **Slot access**: load/store to frame slots via `%fp` + offset
- **NaN-box tagging**: integer tagging (`n << 1`), bool constants (`0x03`/`0x23`), null (`0x07`)
- **Type tests**: `JS_IsInt` (LSB check), `JS_IsNumber`, `JS_IsText`, `JS_IsNull` — these are bit tests on the value, no heap access needed
### What requires a C call
Anything that:
1. **Allocates** (arrays, records, strings, frames, function objects)
2. **Touches the heap** (property get/set, array indexing, closure access)
3. **Dispatches on type at runtime** (dynamic load/store, polymorphic arithmetic)
4. **Calls user functions** (frame setup, argument passing, invocation)
5. **Does string operations** (concatenation, comparison, conversion)
## Runtime Functions
### Tier 1: Essential (must exist for any program to run)
These are called by virtually every QBE program.
#### Intrinsic Lookup
```c
// Look up a built-in function by name. Called once per intrinsic per callsite.
JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name);
```
Maps name → C function pointer wrapped in JSValue. This is the primary entry point for all built-in functions (`print`, `text`, `length`, `is_array`, etc). The native code never calls intrinsics directly — it always goes through `get_intrinsic``frame``invoke`.
#### Function Calls
```c
// Allocate a call frame with space for nr_args arguments.
JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int nr_args);
// Set argument idx in the frame.
void cell_rt_setarg(JSValue frame, int idx, JSValue val);
// Execute the function. Returns the result.
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame);
```
This is the universal calling convention. Every function call — user functions, intrinsics, methods — goes through frame/setarg/invoke. The frame allocates a `JSFrameRegister` on the GC heap, setarg fills slots, invoke dispatches.
**Tail call variants:**
```c
JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int nr_args);
void cell_rt_goinvoke(JSContext *ctx, JSValue frame);
```
Same as frame/invoke but reuse the caller's stack position.
### Tier 2: Property Access (needed by any program using records or arrays)
```c
// Record field by constant name.
JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name);
void cell_rt_store_field(JSContext *ctx, JSValue obj, JSValue val, const char *name);
// Array element by integer index.
JSValue cell_rt_load_index(JSContext *ctx, JSValue obj, JSValue idx);
void cell_rt_store_index(JSContext *ctx, JSValue obj, JSValue idx, JSValue val);
// Dynamic — type of key unknown at compile time.
JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key);
void cell_rt_store_dynamic(JSContext *ctx, JSValue obj, JSValue key, JSValue val);
```
The typed variants (`load_field`/`load_index`) skip the key-type dispatch that `load_dynamic` must do. When parse and fold provide type information, QBE emit selects the typed variant and the streamline optimizer can narrow dynamic → typed.
**Implementation**: These are thin wrappers around existing `JS_GetPropertyStr`/`JS_GetPropertyNumber`/`JS_GetProperty` and their `Set` counterparts.
### Tier 3: Closures (needed by programs with nested functions)
```c
// Walk depth levels up the frame chain, read slot.
JSValue cell_rt_get_closure(JSContext *ctx, JSValue fp, int depth, int slot);
// Walk depth levels up, write slot.
void cell_rt_put_closure(JSContext *ctx, JSValue fp, JSValue val, int depth, int slot);
```
Closure variables live in outer frames. `depth` is how many `caller` links to follow; `slot` is the register index in that frame.
### Tier 4: Object Construction (needed by programs creating arrays/records/functions)
```c
// Create a function object from a compiled function index.
// The native code loader must maintain a function table.
JSValue cell_rt_make_function(JSContext *ctx, int fn_id);
```
Array and record literals are currently compiled as intrinsic calls (`array(...)`, direct `{...}` construction) which go through the frame/invoke path. A future optimization could add:
```c
// Fast paths (optional, not yet needed)
JSValue cell_rt_new_array(JSContext *ctx, int len);
JSValue cell_rt_new_record(JSContext *ctx);
```
### Tier 5: Collection Operations
```c
// a[] = val (push) and var v = a[] (pop)
void cell_rt_push(JSContext *ctx, JSValue arr, JSValue val);
JSValue cell_rt_pop(JSContext *ctx, JSValue arr);
```
### Tier 6: Error Handling
```c
// Trigger disruption. Jumps to the disrupt handler or unwinds.
void cell_rt_disrupt(JSContext *ctx);
```
### Tier 7: Miscellaneous
```c
JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key);
JSValue cell_rt_typeof(JSContext *ctx, JSValue val);
```
### Tier 8: String and Float Helpers (called from QBE inline code, not from qbe_emit)
These are called from the QBE IL that `qbe.cm` generates inline for arithmetic and comparison operations. They're not `cell_rt_` prefixed — they're lower-level:
```c
// Float arithmetic (when operands aren't both ints)
JSValue qbe_float_add(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_sub(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_mul(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_div(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_mod(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_pow(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_neg(JSContext *ctx, JSValue v);
JSValue qbe_float_inc(JSContext *ctx, JSValue v);
JSValue qbe_float_dec(JSContext *ctx, JSValue v);
// Float comparison (returns C int 0/1 for QBE branching)
int qbe_float_cmp(JSContext *ctx, int op, JSValue a, JSValue b);
// Bitwise ops on non-int values (convert to int32 first)
JSValue qbe_bnot(JSContext *ctx, JSValue v);
JSValue qbe_bitwise_and(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_bitwise_or(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_bitwise_xor(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_shift_shl(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_shift_sar(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b);
// String operations
JSValue JS_ConcatString(JSContext *ctx, JSValue a, JSValue b);
int js_string_compare_value(JSContext *ctx, JSValue a, JSValue b, int eq_only);
JSValue JS_NewString(JSContext *ctx, const char *str);
JSValue __JS_NewFloat64(JSContext *ctx, double d);
int JS_ToBool(JSContext *ctx, JSValue v);
// String/number type tests (inline-able but currently calls)
int JS_IsText(JSValue v);
int JS_IsNumber(JSValue v);
// Tolerant equality (== on mixed types)
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b);
// Text ordering comparisons
JSValue cell_rt_lt_text(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_le_text(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_gt_text(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b);
```
## What Exists vs What Needs Writing
### Already exists (in qbe_helpers.c)
All `qbe_float_*`, `qbe_bnot`, `qbe_bitwise_*`, `qbe_shift_*`, `qbe_to_bool` — these are implemented and working.
### Already exists (in runtime.c / quickjs.c) but not yet wrapped
The underlying operations exist but aren't exposed with the `cell_rt_` names:
| Runtime function | Underlying implementation |
|---|---|
| `cell_rt_load_field` | `JS_GetPropertyStr(ctx, obj, name)` |
| `cell_rt_load_index` | `JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx))` |
| `cell_rt_load_dynamic` | `JS_GetProperty(ctx, obj, key)` |
| `cell_rt_store_field` | `JS_SetPropertyStr(ctx, obj, name, val)` |
| `cell_rt_store_index` | `JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx), val)` |
| `cell_rt_store_dynamic` | `JS_SetProperty(ctx, obj, key, val)` |
| `cell_rt_delete` | `JS_DeleteProperty(ctx, obj, key)` |
| `cell_rt_push` | `JS_ArrayPush(ctx, &arr, val)` |
| `cell_rt_pop` | `JS_ArrayPop(ctx, arr)` |
| `cell_rt_typeof` | type tag switch → `JS_NewString` |
| `cell_rt_disrupt` | `JS_Throw(ctx, ...)` |
| `cell_rt_eq_tol` / `cell_rt_ne_tol` | comparison logic in mcode.c `eq_tol`/`ne_tol` handler |
| `cell_rt_lt_text` etc. | `js_string_compare_value` + wrap result |
### Needs new code
| Runtime function | What's needed |
|---|---|
| `cell_rt_get_intrinsic` | Look up intrinsic by name string, return JSValue function. Currently scattered across `js_cell_intrinsic_get` and the mcode handler. Needs a clean single entry point. |
| `cell_rt_frame` | Allocate `JSFrameRegister`, set function slot, set argc. Exists in mcode.c `frame` handler but not as a callable function. |
| `cell_rt_setarg` | Write to frame slot. Trivial: `frame->slots[idx + 1] = val` (slot 0 is `this`). |
| `cell_rt_invoke` | Call the function in the frame. Needs to dispatch: native C function vs mach bytecode vs mcode. This is the critical piece — it must handle all function types. |
| `cell_rt_goframe` / `cell_rt_goinvoke` | Tail call variants. Similar to frame/invoke but reuse caller frame. |
| `cell_rt_make_function` | Create function object from index. Needs a function table (populated by the native loader). |
| `cell_rt_get_closure` / `cell_rt_put_closure` | Walk frame chain. Exists inline in mcode.c `get`/`put` handlers. |
## Recommended C File Organization
```
source/
cell_runtime.c — NEW: all cell_rt_* functions (the native code API)
qbe_helpers.c — existing: float/bitwise/shift helpers for inline QBE
runtime.c — existing: JS_GetProperty, JS_SetProperty, etc.
quickjs.c — existing: core VM, GC, value representation
mcode.c — existing: mcode interpreter (can delegate to cell_runtime.c)
```
**`cell_runtime.c`** is the single file that defines the native code contract. It should:
1. Include `quickjs-internal.h` for access to value representation and heap types
2. Export all `cell_rt_*` functions with C linkage (no `static`)
3. Keep each function thin — delegate to existing `JS_*` functions where possible
4. Handle GC safety: after any allocation (frame, string, array), callers' frames may have moved
### Implementation Priority
**Phase 1** — Get "hello world" running natively:
- `cell_rt_get_intrinsic` (to find `print` and `text`)
- `cell_rt_frame`, `cell_rt_setarg`, `cell_rt_invoke` (to call them)
- A loader that takes QBE output → assembles → links → calls `cell_main`
**Phase 2** — Variables and arithmetic:
- All property access (`load_field`, `load_index`, `store_*`, `load_dynamic`)
- `cell_rt_make_function`, `cell_rt_get_closure`, `cell_rt_put_closure`
**Phase 3** — Full language:
- `cell_rt_push`, `cell_rt_pop`, `cell_rt_delete`, `cell_rt_typeof`
- `cell_rt_disrupt`
- `cell_rt_goframe`, `cell_rt_goinvoke`
- Text comparison wrappers (`cell_rt_lt_text`, etc.)
- Tolerant equality (`cell_rt_eq_tol`, `cell_rt_ne_tol`)
## Calling Convention
All `cell_rt_*` functions follow the same pattern:
- First argument is always `JSContext *ctx`
- Values are passed/returned as `JSValue` (64-bit, by value)
- Frame pointers are `JSValue` (tagged pointer to `JSFrameRegister`)
- String names are `const char *` (pointer to data section label)
- Integer constants (slot indices, arg counts) are `int` / `long`
Native code maintains `%ctx` (JSContext) and `%fp` (current frame pointer) as persistent values across the function body. All slot reads/writes go through `%fp` + offset.
## What Should NOT Be in the C Runtime
These are handled entirely by QBE-generated code:
- **Integer arithmetic and comparisons** — bit operations on NaN-boxed values
- **Control flow** — branches, loops, labels, jumps
- **Boolean logic** — `and`/`or`/`not` on tagged values
- **Constant loading** — integer constants are immediate, strings are data labels
- **Type guard branches** — the `is_int`/`is_text`/`is_null` checks are inline bit tests; the branch to the float or text path is just a QBE `jnz`
The `qbe.cm` macros already handle all of this. The arithmetic path looks like:
```
check both ints? → yes → inline int add → done
→ no → call qbe_float_add (or JS_ConcatString for text)
```
The C runtime is only called on the slow paths (float, text, dynamic dispatch). The fast path (integer arithmetic, comparisons, branching) is fully native.

View File

@@ -10,12 +10,11 @@ Mcode is a JSON-based intermediate representation that can be interpreted direct
## Pipeline
```
Source → Tokenize → Parse (AST) → Mcode (JSON) → Interpret
→ Compile to Mach (planned)
→ Compile to native (planned)
Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Interpret
→ QBE → Native
```
Mcode is produced by the `JS_Mcode` compiler pass, which emits a cJSON tree. The mcode interpreter walks this tree directly, dispatching on instruction name strings.
Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result can be interpreted by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview.
## JSMCode Structure
@@ -44,16 +43,37 @@ struct JSMCode {
## Instruction Format
Each instruction is a JSON array. The first element is the instruction name (string), followed by operands:
Each instruction is a JSON array. The first element is the instruction name (string), followed by operands (typically `[op, dest, ...args, line, col]`):
```json
["LOADK", 0, 42]
["ADD", 2, 0, 1]
["JMPFALSE", 3, "else_label"]
["CALL", 0, 2, 1]
["access", 3, 5, 1, 9]
["load_index", 10, 4, 9, 5, 11]
["store_dynamic", 4, 11, 12, 6, 3]
["frame", 15, 14, 1, 7, 7]
["setarg", 15, 0, 16, 7, 7]
["invoke", 15, 13, 7, 7]
```
The instruction set mirrors the Mach VM opcodes — same operations, same register semantics, but with string dispatch instead of numeric opcodes.
### Typed Load/Store
Memory operations come in typed variants for optimization:
- `load_index dest, obj, idx` — array element by integer index
- `load_field dest, obj, key` — record property by string key
- `load_dynamic dest, obj, key` — unknown; dispatches at runtime
- `store_index obj, val, idx` — array element store
- `store_field obj, val, key` — record property store
- `store_dynamic obj, val, key` — unknown; dispatches at runtime
The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold.
### Decomposed Calls
Function calls are split into separate instructions:
- `frame dest, fn, argc` — allocate call frame
- `setarg frame, idx, val` — set argument
- `invoke frame, result` — execute the call
## Labels

118
docs/spec/pipeline.md Normal file
View File

@@ -0,0 +1,118 @@
---
title: "Compilation Pipeline"
description: "Overview of the compilation stages and optimizations"
---
## Overview
The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental).
```
Source → Tokenize → Parse → Fold → Mach VM (default)
→ Mcode → Streamline → Mcode Interpreter
→ QBE → Native
```
## Stages
### Tokenize (`tokenize.cm`)
Splits source text into tokens. Handles string interpolation by re-tokenizing template literal contents. Produces a token array with position information (line, column).
### Parse (`parse.cm`)
Converts tokens into an AST. Also performs semantic analysis:
- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`.
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`, `"null"`.
- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`.
- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise.
- **Tail position**: Return statements where the expression is a call get `tail: true`.
### Fold (`fold.cm`)
Operates on the AST. Performs constant folding and type analysis:
- **Constant folding**: Evaluates arithmetic on known constants at compile time (e.g., `5 + 10` becomes `15`).
- **Constant propagation**: Tracks `def` bindings whose values are known constants.
- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites.
- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`.
- **Purity marking**: Stamps `pure: true` on expressions with no side effects (literals, name references, arithmetic on pure operands).
- **Dead code elimination**: Removes unreachable branches when conditions are known constants.
### Mcode (`mcode.cm`)
Lowers the AST to a JSON-based intermediate representation with explicit operations. Key design principle: **every type check is an explicit instruction** so downstream optimizers can see and eliminate them.
- **Typed load/store**: Emits `load_index` (array by integer), `load_field` (record by string), or `load_dynamic` (unknown) based on type information from fold.
- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call).
- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup.
See [Mcode IR](mcode.md) for instruction format details.
### Streamline (`streamline.cm`)
Optimizes the Mcode IR. Operates per-function:
- **Redundant instruction elimination**: Removes no-op patterns and redundant moves.
- **Dead code removal**: Eliminates instructions whose results are never used.
- **Type-based narrowing**: When type information is available, narrows `load_dynamic`/`store_dynamic` to typed variants.
### QBE Emit (`qbe_emit.cm`)
Lowers optimized Mcode IR to QBE intermediate language for native code compilation. Each Mcode function becomes a QBE function that calls into the cell runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.).
String constants are interned in a data section. Integer constants are NaN-boxed inline.
### QBE Macros (`qbe.cm`)
Provides operation implementations as QBE IL templates. Each arithmetic, comparison, and type operation is defined as a function that emits the corresponding QBE instructions, handling type dispatch (integer, float, text paths) with proper guard checks.
## Execution Backends
### Mach VM (default)
Binary 32-bit register VM. Used for production execution and bootstrapping.
```
./cell script.ce
```
### Mcode Interpreter
JSON-based interpreter. Used for debugging the compilation pipeline.
```
./cell --mcode script.ce
```
### QBE Native (experimental)
Generates QBE IL that can be compiled to native code.
```
./cell --emit-qbe script.ce > output.ssa
```
## Files
| File | Role |
|------|------|
| `tokenize.cm` | Lexer |
| `parse.cm` | Parser + semantic analysis |
| `fold.cm` | Constant folding + type analysis |
| `mcode.cm` | AST → Mcode IR lowering |
| `streamline.cm` | Mcode IR optimizer |
| `qbe_emit.cm` | Mcode IR → QBE IL emitter |
| `qbe.cm` | QBE IL operation templates |
| `internal/bootstrap.cm` | Pipeline orchestrator |
## Test Files
| File | Tests |
|------|-------|
| `parse_test.ce` | Type tags, access_kind, intrinsic resolution |
| `fold_test.ce` | Type propagation, purity, intrinsic hints |
| `mcode_test.ce` | Typed load/store, decomposed calls |
| `streamline_test.ce` | Optimization counts, IR before/after |
| `qbe_test.ce` | End-to-end QBE IL generation |

47
fold.cm
View File

@@ -158,6 +158,7 @@ var fold = function(ast) {
var name = null
var sv = null
var item = null
var rhs_target = null
while (i < length(stmts)) {
stmt = stmts[i]
kind = stmt.kind
@@ -169,6 +170,19 @@ var fold = function(ast) {
register_const(fn_nr, name, stmt.right)
}
}
if (name != null && stmt.right != null && stmt.right.kind == "(") {
rhs_target = stmt.right.expression
if (rhs_target != null && rhs_target.intrinsic == true) {
sv = scope_var(fn_nr, name)
if (sv != null && sv.type_tag == null) {
if (rhs_target.name == "array") sv.type_tag = "array"
else if (rhs_target.name == "record") sv.type_tag = "record"
else if (rhs_target.name == "text") sv.type_tag = "text"
else if (rhs_target.name == "number") sv.type_tag = "number"
else if (rhs_target.name == "blob") sv.type_tag = "blob"
}
}
}
} else if (kind == "function") {
name = stmt.name
if (name != null && stmt.arity != null) {
@@ -320,6 +334,8 @@ var fold = function(ast) {
var ar = null
var akey = null
var tv = null
var att = null
var arg = null
// Recurse into children first (bottom-up)
if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" ||
@@ -385,6 +401,10 @@ var fold = function(ast) {
return copy_loc(expr, {kind: lit.kind, value: lit.value, number: lit.number})
}
}
sv = scope_var(fn_nr, expr.name)
if (sv != null && sv.type_tag != null) {
expr.type_tag = sv.type_tag
}
return expr
}
@@ -497,7 +517,7 @@ var fold = function(ast) {
return expr
}
// Call: stamp arity
// Call: stamp arity and fold intrinsic type checks
if (k == "(") {
target = expr.expression
if (target != null && target.kind == "name" && target.level == 0) {
@@ -506,6 +526,30 @@ var fold = function(ast) {
if (fn_arities[akey] != null) ar = fn_arities[akey][target.name]
if (ar != null) expr.arity = ar
}
if (target != null && target.intrinsic == true && length(expr.list) == 1) {
arg = expr.list[0]
att = null
if (arg.type_tag != null) {
att = arg.type_tag
} else if (arg.kind == "name" && arg.level == 0) {
sv = scope_var(fn_nr, arg.name)
if (sv != null) att = sv.type_tag
}
if (att != null) {
if (target.name == "is_array") return make_bool(att == "array", expr)
if (target.name == "is_text") return make_bool(att == "text", expr)
if (target.name == "is_number") return make_bool(att == "number" || att == "integer", expr)
if (target.name == "is_integer") return make_bool(att == "integer", expr)
if (target.name == "is_function") return make_bool(att == "function", expr)
if (target.name == "is_logical") return make_bool(att == "logical", expr)
if (target.name == "is_null") return make_bool(att == "null", expr)
if (target.name == "is_object") return make_bool(att == "record", expr)
if (target.name == "length") {
if (att == "array") expr.hint = "array_length"
else if (att == "text") expr.hint = "text_length"
}
}
}
return expr
}
@@ -525,6 +569,7 @@ var fold = function(ast) {
if (k == "var" || k == "def") {
stmt.right = fold_expr(stmt.right, fn_nr)
if (is_pure(stmt.right)) stmt.pure = true
return stmt
}
if (k == "var_list") {

BIN
fold.mach

Binary file not shown.

View File

@@ -41,11 +41,17 @@ var boot_env = {use: use_basic}
var tokenize_mod = boot_load("tokenize", boot_env)
var parse_mod = boot_load("parse", boot_env)
var fold_mod = boot_load("fold", boot_env)
use_cache['tokenize'] = tokenize_mod
use_cache['parse'] = parse_mod
use_cache['fold'] = fold_mod
// Optionally load mcode compiler module
var mcode_mod = null
var streamline_mod = null
var qbe_emit_mod = null
if (use_mcode) {
mcode_mod = boot_load("mcode", boot_env)
use_cache['mcode'] = mcode_mod
}
// Warn if any .cm source is newer than its .mach bytecode
@@ -55,6 +61,9 @@ function check_mach_stale() {
["parse.cm", "parse.mach"],
["fold.cm", "fold.mach"],
["mcode.cm", "mcode.mach"],
["streamline.cm", "streamline.mach"],
["qbe.cm", "qbe.mach"],
["qbe_emit.cm", "qbe_emit.mach"],
["internal/bootstrap.cm", "internal/bootstrap.mach"],
["internal/engine.cm", "internal/engine.mach"]
]
@@ -118,26 +127,78 @@ function analyze(src, filename) {
return ast
}
// Load a module from .mach bytecode, falling back to source compilation
function load_module(name, env) {
var mach_path = core_path + '/' + name + ".mach"
var data = null
var src_path = null
var src = null
var ast = null
if (fd.is_file(mach_path)) {
data = fd.slurp(mach_path)
return mach_load(data, env)
}
src_path = core_path + '/' + name + ".cm"
src = text(fd.slurp(src_path))
ast = analyze(src, src_path)
return mach_eval_ast(name, json.encode(ast), env)
}
// Load optimization pipeline modules (needs analyze to be defined)
var qbe_macros = null
if (use_mcode) {
streamline_mod = load_module("streamline", boot_env)
use_cache['streamline'] = streamline_mod
if (emit_qbe) {
qbe_macros = load_module("qbe", boot_env)
qbe_emit_mod = load_module("qbe_emit", boot_env)
use_cache['qbe'] = qbe_macros
use_cache['qbe_emit'] = qbe_emit_mod
}
}
// Run AST through either mcode or mach pipeline
function run_ast(name, ast, env) {
var compiled = null
var optimized = null
var qbe_il = null
if (use_mcode) {
compiled = mcode_mod(ast)
return mcode_run(name, json.encode(compiled), env)
optimized = streamline_mod(compiled)
if (emit_qbe) {
qbe_il = qbe_emit_mod(optimized, qbe_macros)
print(qbe_il)
return null
}
return mcode_run(name, json.encode(optimized), env)
}
return mach_eval_ast(name, json.encode(ast), env)
}
// use() with ƿit pipeline for .cm modules
function use_fn(path) {
var file_path = path + '.cm'
var file_path = null
var mach_path = null
var data = null
var script = null
var ast = null
var result = null
if (use_cache[path])
return use_cache[path]
// Check CWD first, then core_path
// Try .mach bytecode first (CWD then core_path)
mach_path = path + '.mach'
if (!fd.is_file(mach_path))
mach_path = core_path + '/' + path + '.mach'
if (fd.is_file(mach_path)) {
data = fd.slurp(mach_path)
result = mach_load(data, {use: use_fn})
use_cache[path] = result
return result
}
// Try .cm source (CWD then core_path)
file_path = path + '.cm'
if (!fd.is_file(file_path))
file_path = core_path + '/' + path + '.cm'

View File

@@ -2,10 +2,12 @@ var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var filename = args[0]
var src = text(fd.slurp(filename))
var result = tokenize(src, filename)
var ast = parse(result.tokens, src, filename)
var compiled = mcode(ast)
var ast = parse(result.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
print(json.encode(compiled))

199
mcode.cm
View File

@@ -51,6 +51,13 @@ var mcode = function(ast) {
var s_cur_col = 0
var s_filename = null
// Shared closure vars for binop helpers (avoids >4 param functions)
var _bp_dest = 0
var _bp_left = 0
var _bp_right = 0
var _bp_ln = null
var _bp_rn = null
// State save/restore for nested function compilation
var save_state = function() {
return {
@@ -260,15 +267,19 @@ var mcode = function(ast) {
}
// emit_add_decomposed: int path -> text path -> float path -> disrupt
var emit_add_decomposed = function(dest, left, right, left_node, right_node) {
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_add_decomposed = function() {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(left_node)
var left_is_text = is_known_text(left_node)
var left_is_num = is_known_number(left_node)
var right_is_int = is_known_int(right_node)
var right_is_text = is_known_text(right_node)
var right_is_num = is_known_number(right_node)
var left_is_int = is_known_int(_bp_ln)
var left_is_text = is_known_text(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_text = is_known_text(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var not_int = null
var not_text = null
var done = null
@@ -346,13 +357,17 @@ var mcode = function(ast) {
}
// emit_numeric_binop: int path -> float path -> disrupt
var emit_numeric_binop = function(int_op, float_op, dest, left, right, left_node, right_node) {
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_numeric_binop = function(int_op, float_op) {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(left_node)
var left_is_num = is_known_number(left_node)
var right_is_int = is_known_int(right_node)
var right_is_num = is_known_number(right_node)
var left_is_int = is_known_int(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var not_int = null
var done = null
var err = null
@@ -404,7 +419,11 @@ var mcode = function(ast) {
}
// emit_eq_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(false)
var emit_eq_decomposed = function(dest, left, right, left_node, right_node) {
// reads _bp_dest, _bp_left, _bp_right from closure
var emit_eq_decomposed = function() {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var done = gen_label("eq_done")
@@ -472,7 +491,11 @@ var mcode = function(ast) {
}
// emit_ne_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(true)
var emit_ne_decomposed = function(dest, left, right, left_node, right_node) {
// reads _bp_dest, _bp_left, _bp_right from closure
var emit_ne_decomposed = function() {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var done = gen_label("ne_done")
@@ -549,15 +572,19 @@ var mcode = function(ast) {
}
// emit_relational: int -> float -> text -> disrupt
var emit_relational = function(int_op, float_op, text_op, dest, left, right, left_node, right_node) {
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_relational = function(int_op, float_op, text_op) {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(left_node)
var left_is_num = is_known_number(left_node)
var left_is_text = is_known_text(left_node)
var right_is_int = is_known_int(right_node)
var right_is_num = is_known_number(right_node)
var right_is_text = is_known_text(right_node)
var left_is_int = is_known_int(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var left_is_text = is_known_text(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var right_is_text = is_known_text(_bp_rn)
var not_int = null
var not_num = null
var done = null
@@ -654,29 +681,33 @@ var mcode = function(ast) {
}
// Central router: maps op string to decomposition helper
var emit_binop = function(op_str, dest, left, right, left_node, right_node) {
// Sets _bp_* closure vars then calls helper with reduced args
var emit_binop = function(op_str, dest, left, right) {
_bp_dest = dest
_bp_left = left
_bp_right = right
if (op_str == "add") {
emit_add_decomposed(dest, left, right, left_node, right_node)
emit_add_decomposed()
} else if (op_str == "subtract") {
emit_numeric_binop("sub_int", "sub_float", dest, left, right, left_node, right_node)
emit_numeric_binop("sub_int", "sub_float")
} else if (op_str == "multiply") {
emit_numeric_binop("mul_int", "mul_float", dest, left, right, left_node, right_node)
emit_numeric_binop("mul_int", "mul_float")
} else if (op_str == "divide") {
emit_numeric_binop("div_int", "div_float", dest, left, right, left_node, right_node)
emit_numeric_binop("div_int", "div_float")
} else if (op_str == "modulo") {
emit_numeric_binop("mod_int", "mod_float", dest, left, right, left_node, right_node)
emit_numeric_binop("mod_int", "mod_float")
} else if (op_str == "eq") {
emit_eq_decomposed(dest, left, right, left_node, right_node)
emit_eq_decomposed()
} else if (op_str == "ne") {
emit_ne_decomposed(dest, left, right, left_node, right_node)
emit_ne_decomposed()
} else if (op_str == "lt") {
emit_relational("lt_int", "lt_float", "lt_text", dest, left, right, left_node, right_node)
emit_relational("lt_int", "lt_float", "lt_text")
} else if (op_str == "le") {
emit_relational("le_int", "le_float", "le_text", dest, left, right, left_node, right_node)
emit_relational("le_int", "le_float", "le_text")
} else if (op_str == "gt") {
emit_relational("gt_int", "gt_float", "gt_text", dest, left, right, left_node, right_node)
emit_relational("gt_int", "gt_float", "gt_text")
} else if (op_str == "ge") {
emit_relational("ge_int", "ge_float", "ge_text", dest, left, right, left_node, right_node)
emit_relational("ge_int", "ge_float", "ge_text")
} else {
// Passthrough for bitwise, pow, in, etc.
emit_3(op_str, dest, left, right)
@@ -685,19 +716,31 @@ var mcode = function(ast) {
}
var emit_get_prop = function(dest, obj, prop) {
add_instr(["load", dest, obj, prop])
add_instr(["load_field", dest, obj, prop])
}
var emit_set_prop = function(obj, prop, val) {
add_instr(["store", obj, val, prop])
add_instr(["store_field", obj, val, prop])
}
var emit_get_elem = function(dest, obj, idx) {
emit_3("load", dest, obj, idx)
var emit_get_elem = function(dest, obj, idx, access_kind) {
if (access_kind == "index") {
emit_3("load_index", dest, obj, idx)
} else if (access_kind == "field") {
emit_3("load_field", dest, obj, idx)
} else {
emit_3("load_dynamic", dest, obj, idx)
}
}
var emit_set_elem = function(obj, idx, val) {
emit_3("store", obj, val, idx)
var emit_set_elem = function(obj, idx, val, access_kind) {
if (access_kind == "index") {
emit_3("store_index", obj, val, idx)
} else if (access_kind == "field") {
emit_3("store_field", obj, val, idx)
} else {
emit_3("store_dynamic", obj, val, idx)
}
}
var emit_call = function(dest, func_slot, args) {
@@ -718,23 +761,37 @@ var mcode = function(ast) {
}
var emit_call_method = function(dest, obj, prop, args) {
var instr = ["callmethod", dest, obj, prop]
var method_slot = alloc_slot()
add_instr(["load_field", method_slot, obj, prop])
var argc = length(args)
var frame_slot = alloc_slot()
emit_3("frame", frame_slot, method_slot, argc)
emit_3("setarg", frame_slot, 0, obj)
var arg_idx = 1
var _i = 0
while (_i < length(args)) {
push(instr, args[_i])
while (_i < argc) {
emit_3("setarg", frame_slot, arg_idx, args[_i])
arg_idx = arg_idx + 1
_i = _i + 1
}
add_instr(instr)
emit_2("invoke", frame_slot, dest)
}
var emit_call_method_dyn = function(dest, obj, key_reg, args) {
var instr = ["callmethod_dyn", dest, obj, key_reg]
var method_slot = alloc_slot()
emit_3("load_dynamic", method_slot, obj, key_reg)
var argc = length(args)
var frame_slot = alloc_slot()
emit_3("frame", frame_slot, method_slot, argc)
emit_3("setarg", frame_slot, 0, obj)
var arg_idx = 1
var _i = 0
while (_i < length(args)) {
push(instr, args[_i])
while (_i < argc) {
emit_3("setarg", frame_slot, arg_idx, args[_i])
arg_idx = arg_idx + 1
_i = _i + 1
}
add_instr(instr)
emit_2("invoke", frame_slot, dest)
}
var emit_go_call = function(func_slot, args) {
@@ -920,7 +977,9 @@ var mcode = function(ast) {
if (op == null) {
op = "add"
}
emit_binop(op, dest, left_slot, right_slot, left, right)
_bp_ln = left
_bp_rn = right
emit_binop(op, dest, left_slot, right_slot)
return dest
}
@@ -972,7 +1031,9 @@ var mcode = function(ast) {
}
right_slot = gen_expr(right, -1)
dest = alloc_slot()
emit_binop(op, dest, left_slot, right_slot, null, right)
_bp_ln = null
_bp_rn = right
emit_binop(op, dest, left_slot, right_slot)
if (level == 0) {
local = find_var(name)
if (local >= 0) {
@@ -995,7 +1056,9 @@ var mcode = function(ast) {
emit_get_prop(old_val, obj_slot, prop)
right_slot = gen_expr(right, -1)
dest = alloc_slot()
emit_binop(op, dest, old_val, right_slot, null, right)
_bp_ln = null
_bp_rn = right
emit_binop(op, dest, old_val, right_slot)
emit_set_prop(obj_slot, prop, dest)
return dest
} else if (left_kind == "[") {
@@ -1004,11 +1067,13 @@ var mcode = function(ast) {
obj_slot = gen_expr(obj, -1)
idx_slot = gen_expr(idx_expr, -1)
old_val = alloc_slot()
emit_get_elem(old_val, obj_slot, idx_slot)
emit_get_elem(old_val, obj_slot, idx_slot, left.access_kind)
right_slot = gen_expr(right, -1)
dest = alloc_slot()
emit_binop(op, dest, old_val, right_slot, null, right)
emit_set_elem(obj_slot, idx_slot, dest)
_bp_ln = null
_bp_rn = right
emit_binop(op, dest, old_val, right_slot)
emit_set_elem(obj_slot, idx_slot, dest, left.access_kind)
return dest
}
return -1
@@ -1081,7 +1146,7 @@ var mcode = function(ast) {
idx_expr = left.right
obj_slot = gen_expr(obj, -1)
idx_slot = gen_expr(idx_expr, -1)
emit_set_elem(obj_slot, idx_slot, val_slot)
emit_set_elem(obj_slot, idx_slot, val_slot, left.access_kind)
}
return val_slot
}
@@ -1301,7 +1366,7 @@ var mcode = function(ast) {
obj_slot = gen_expr(obj, -1)
idx_slot = gen_expr(idx, -1)
slot = alloc_slot()
emit_get_elem(slot, obj_slot, idx_slot)
emit_get_elem(slot, obj_slot, idx_slot, expr.access_kind)
return slot
}
@@ -1357,7 +1422,9 @@ var mcode = function(ast) {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
d = alloc_slot()
emit_binop(mop, d, a0, a1, args_list[0], args_list[1])
_bp_ln = args_list[0]
_bp_rn = args_list[1]
emit_binop(mop, d, a0, a1)
return d
}
@@ -1442,7 +1509,9 @@ var mcode = function(ast) {
emit_access_intrinsic(old_slot, name)
}
new_slot = alloc_slot()
emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node)
_bp_ln = null
_bp_rn = one_node
emit_binop(arith_op, new_slot, old_slot, one_slot)
if (level == 0) {
local = find_var(name)
if (local >= 0) {
@@ -1462,7 +1531,9 @@ var mcode = function(ast) {
old_slot = alloc_slot()
emit_get_prop(old_slot, obj_slot, prop)
new_slot = alloc_slot()
emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node)
_bp_ln = null
_bp_rn = one_node
emit_binop(arith_op, new_slot, old_slot, one_slot)
emit_set_prop(obj_slot, prop, new_slot)
return postfix ? old_slot : new_slot
} else if (operand_kind == "[") {
@@ -1471,10 +1542,12 @@ var mcode = function(ast) {
obj_slot = gen_expr(obj, -1)
idx_slot = gen_expr(idx_expr, -1)
old_slot = alloc_slot()
emit_get_elem(old_slot, obj_slot, idx_slot)
emit_get_elem(old_slot, obj_slot, idx_slot, operand.access_kind)
new_slot = alloc_slot()
emit_binop(arith_op, new_slot, old_slot, one_slot, null, one_node)
emit_set_elem(obj_slot, idx_slot, new_slot)
_bp_ln = null
_bp_rn = one_node
emit_binop(arith_op, new_slot, old_slot, one_slot)
emit_set_elem(obj_slot, idx_slot, new_slot, operand.access_kind)
return postfix ? old_slot : new_slot
}
}
@@ -1911,7 +1984,9 @@ var mcode = function(ast) {
case_expr = case_node.expression
case_val = gen_expr(case_expr, -1)
cmp_slot = alloc_slot()
emit_binop("eq", cmp_slot, switch_val, case_val, null, case_expr)
_bp_ln = null
_bp_rn = case_expr
emit_binop("eq", cmp_slot, switch_val, case_val)
emit_jump_cond("jump_true", cmp_slot, case_label)
push(case_labels, case_label)
}

Binary file not shown.

View File

@@ -1,8 +1,9 @@
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var filename = args[0]
var src = text(fd.slurp(filename))
var result = tokenize(src, filename)
var ast = parse(result.tokens, src, filename)
var ast = parse(result.tokens, src, filename, tokenize)
print(json.encode(ast))

View File

@@ -1493,6 +1493,22 @@ var parse = function(tokens, src, filename, tokenizer) {
return functino_names[name] == true
}
var derive_type_tag = function(expr) {
if (expr == null) return null
var k = expr.kind
if (k == "array") return "array"
if (k == "record") return "record"
if (k == "function") return "function"
if (k == "text" || k == "text literal") return "text"
if (k == "number") {
if (is_integer(expr.number)) return "integer"
return "number"
}
if (k == "true" || k == "false") return "logical"
if (k == "null") return "null"
return null
}
var _assign_kinds = {
assign: true, "+=": true, "-=": true, "*=": true, "/=": true, "%=": true,
"<<=": true, ">>=": true, ">>>=": true,
@@ -1517,7 +1533,8 @@ var parse = function(tokens, src, filename, tokenizer) {
function_nr: v.function_nr,
nr_uses: v.nr_uses,
closure: v.closure == 1,
level: 0
level: 0,
type_tag: v.type_tag
}
slots = slots + 1
if (v.closure) close_slots = close_slots + 1
@@ -1650,13 +1667,26 @@ var parse = function(tokens, src, filename, tokenizer) {
return null
}
if (kind == "[") {
sem_check_expr(scope, expr.left)
sem_check_expr(scope, expr.right)
if (expr.right != null) {
if (expr.right.kind == "number" && is_integer(expr.right.number)) {
expr.access_kind = "index"
} else if (expr.right.kind == "text") {
expr.access_kind = "field"
}
}
return null
}
if (kind == "," || kind == "+" || kind == "-" || kind == "*" ||
kind == "/" || kind == "%" || kind == "==" || kind == "!=" ||
kind == "<" || kind == ">" || kind == "<=" || kind == ">=" ||
kind == "&&" || kind == "||" || kind == "&" ||
kind == "|" || kind == "^" || kind == "<<" || kind == ">>" ||
kind == ">>>" || kind == "**" || kind == "in" ||
kind == "." || kind == "[") {
kind == ".") {
sem_check_expr(scope, expr.left)
sem_check_expr(scope, expr.right)
return null
@@ -1765,6 +1795,7 @@ var parse = function(tokens, src, filename, tokenizer) {
if (r.level > 0) r.v.closure = 1
} else {
expr.level = -1
expr.intrinsic = true
sem_add_intrinsic(name)
}
}
@@ -1788,6 +1819,7 @@ var parse = function(tokens, src, filename, tokenizer) {
var pname = null
var def_val = null
var sr = null
var tt = null
if (kind == "var_list") {
i = 0
@@ -1827,6 +1859,13 @@ var parse = function(tokens, src, filename, tokenizer) {
}
}
sem_check_expr(scope, stmt.right)
if (name != null) {
tt = derive_type_tag(stmt.right)
if (tt != null) {
existing = sem_find_var(scope, name)
if (existing != null) existing.type_tag = tt
}
}
return null
}
@@ -1904,6 +1943,9 @@ var parse = function(tokens, src, filename, tokenizer) {
if (kind == "return" || kind == "go") {
sem_check_expr(scope, stmt.expression)
if (stmt.expression != null && stmt.expression.kind == "(") {
stmt.tail = true
}
return null
}

18
qbe.ce Normal file
View File

@@ -0,0 +1,18 @@
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var qbe_macros = use("qbe")
var qbe_emit = use("qbe_emit")
var filename = args[0]
var src = text(fd.slurp(filename))
var result = tokenize(src, filename)
var ast = parse(result.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
var il = qbe_emit(optimized, qbe_macros)
print(il)

97
qbe.cm
View File

@@ -13,6 +13,11 @@ def js_true = 35
def js_exception = 15
def js_empty_text = 27
// Shared closure vars for functions with >4 params
var _qop = null
var _qop2 = null
var _qflags = null
def int32_min = -2147483648
def int32_max = 2147483647
def mantissa_mask = 4503599627370495
@@ -398,18 +403,20 @@ var mod = function(p, ctx, a, b) {
// ============================================================
// Helper: generate comparison for a given op string and int comparison QBE op
// null_true: whether null==null returns true (eq, le, ge) or false (ne, lt, gt)
var cmp = function(p, ctx, a, b, int_cmp_op, float_cmp_op_id, is_eq, is_ne, null_true) {
// reads _qflags = {int_cmp_op, float_id, is_eq, is_ne, null_true} from closure
var cmp = function(p, ctx, a, b) {
var int_cmp_op = _qflags.int_cmp_op
var float_cmp_op_id = _qflags.float_id
var eq_only = 0
if (is_eq || is_ne) {
var mismatch_val = js_false
var null_val = js_false
if (_qflags.is_eq || _qflags.is_ne) {
eq_only = 1
}
var mismatch_val = js_false
if (is_ne) {
if (_qflags.is_ne) {
mismatch_val = js_true
}
var null_val = js_false
if (null_true) {
if (_qflags.null_true) {
null_val = js_true
}
return `@${p}.start
@@ -485,27 +492,32 @@ var cmp = function(p, ctx, a, b, int_cmp_op, float_cmp_op_id, is_eq, is_ne, null
// MACH_EQ=0, NEQ=1, LT=2, LE=3, GT=4, GE=5
// null_true: eq, le, ge return true for null==null; ne, lt, gt return false
var eq = function(p, ctx, a, b) {
return cmp(p, ctx, a, b, "ceqw", 0, true, false, true)
_qflags = {int_cmp_op: "ceqw", float_id: 0, is_eq: true, is_ne: false, null_true: true}
return cmp(p, ctx, a, b)
}
var ne = function(p, ctx, a, b) {
return cmp(p, ctx, a, b, "cnew", 1, false, true, false)
_qflags = {int_cmp_op: "cnew", float_id: 1, is_eq: false, is_ne: true, null_true: false}
return cmp(p, ctx, a, b)
}
var lt = function(p, ctx, a, b) {
return cmp(p, ctx, a, b, "csltw", 2, false, false, false)
_qflags = {int_cmp_op: "csltw", float_id: 2, is_eq: false, is_ne: false, null_true: false}
return cmp(p, ctx, a, b)
}
var le = function(p, ctx, a, b) {
return cmp(p, ctx, a, b, "cslew", 3, false, false, true)
_qflags = {int_cmp_op: "cslew", float_id: 3, is_eq: false, is_ne: false, null_true: true}
return cmp(p, ctx, a, b)
}
var gt = function(p, ctx, a, b) {
return cmp(p, ctx, a, b, "csgtw", 4, false, false, false)
_qflags = {int_cmp_op: "csgtw", float_id: 4, is_eq: false, is_ne: false, null_true: false}
return cmp(p, ctx, a, b)
}
var ge = function(p, ctx, a, b) {
return cmp(p, ctx, a, b, "csgew", 5, false, false, true)
_qflags = {int_cmp_op: "csgew", float_id: 5, is_eq: false, is_ne: false, null_true: true}
}
// ============================================================
@@ -627,7 +639,9 @@ var bnot = function(p, ctx, v) {
// Both operands must be numeric. Int fast path, float -> convert to int32.
// ============================================================
var bitwise_op = function(p, ctx, a, b, qbe_op) {
// reads _qop from closure
var bitwise_op = function(p, ctx, a, b) {
var qbe_op = _qop
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
@@ -654,19 +668,24 @@ var bitwise_op = function(p, ctx, a, b, qbe_op) {
}
var band = function(p, ctx, a, b) {
return bitwise_op(p, ctx, a, b, "and")
_qop = "and"
return bitwise_op(p, ctx, a, b)
}
var bor = function(p, ctx, a, b) {
return bitwise_op(p, ctx, a, b, "or")
_qop = "or"
return bitwise_op(p, ctx, a, b)
}
var bxor = function(p, ctx, a, b) {
return bitwise_op(p, ctx, a, b, "xor")
_qop = "xor"
return bitwise_op(p, ctx, a, b)
}
// Shift ops: mask shift amount to 5 bits (& 31)
var shift_op = function(p, ctx, a, b, qbe_op) {
// reads _qop from closure
var shift_op = function(p, ctx, a, b) {
var qbe_op = _qop
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
@@ -694,15 +713,18 @@ var shift_op = function(p, ctx, a, b, qbe_op) {
}
var shl = function(p, ctx, a, b) {
return shift_op(p, ctx, a, b, "shl")
_qop = "shl"
return shift_op(p, ctx, a, b)
}
var shr = function(p, ctx, a, b) {
return shift_op(p, ctx, a, b, "sar")
_qop = "sar"
return shift_op(p, ctx, a, b)
}
var ushr = function(p, ctx, a, b) {
return shift_op(p, ctx, a, b, "shr")
_qop = "shr"
return shift_op(p, ctx, a, b)
}
// ============================================================
@@ -898,7 +920,9 @@ var gt_int = function(p, ctx, a, b) { return cmp_int(p, a, b, "csgtw") }
var ge_int = function(p, ctx, a, b) { return cmp_int(p, a, b, "csgew") }
// --- Comparisons (float path) ---
var cmp_float = function(p, ctx, a, b, op_id) {
// reads _qop from closure (op_id)
var cmp_float = function(p, ctx, a, b) {
var op_id = _qop
return ` %${p}.fcr =w call $qbe_float_cmp(l ${ctx}, w ${op_id}, l ${a}, l ${b})
%${p}.fcrext =l extuw %${p}.fcr
%${p}.fsh =l shl %${p}.fcrext, 5
@@ -906,15 +930,18 @@ var cmp_float = function(p, ctx, a, b, op_id) {
`
}
var eq_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 0) }
var ne_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 1) }
var lt_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 2) }
var le_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 3) }
var gt_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 4) }
var ge_float = function(p, ctx, a, b) { return cmp_float(p, ctx, a, b, 5) }
var eq_float = function(p, ctx, a, b) { _qop = 0; return cmp_float(p, ctx, a, b) }
var ne_float = function(p, ctx, a, b) { _qop = 1; return cmp_float(p, ctx, a, b) }
var lt_float = function(p, ctx, a, b) { _qop = 2; return cmp_float(p, ctx, a, b) }
var le_float = function(p, ctx, a, b) { _qop = 3; return cmp_float(p, ctx, a, b) }
var gt_float = function(p, ctx, a, b) { _qop = 4; return cmp_float(p, ctx, a, b) }
var ge_float = function(p, ctx, a, b) { _qop = 5; return cmp_float(p, ctx, a, b) }
// --- Comparisons (text path) ---
var cmp_text = function(p, ctx, a, b, qbe_op, eq_only) {
// reads _qop (qbe_op) and _qop2 (eq_only) from closure
var cmp_text = function(p, ctx, a, b) {
var qbe_op = _qop
var eq_only = _qop2
return ` %${p}.scmp =w call $js_string_compare_value(l ${ctx}, l ${a}, l ${b}, w ${eq_only})
%${p}.tcr =w ${qbe_op} %${p}.scmp, 0
%${p}.tcrext =l extuw %${p}.tcr
@@ -923,12 +950,12 @@ var cmp_text = function(p, ctx, a, b, qbe_op, eq_only) {
`
}
var eq_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "ceqw", 1) }
var ne_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "cnew", 1) }
var lt_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csltw", 0) }
var le_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "cslew", 0) }
var gt_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csgtw", 0) }
var ge_text = function(p, ctx, a, b) { return cmp_text(p, ctx, a, b, "csgew", 0) }
var eq_text = function(p, ctx, a, b) { _qop = "ceqw"; _qop2 = 1; return cmp_text(p, ctx, a, b) }
var ne_text = function(p, ctx, a, b) { _qop = "cnew"; _qop2 = 1; return cmp_text(p, ctx, a, b) }
var lt_text = function(p, ctx, a, b) { _qop = "csltw"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
var le_text = function(p, ctx, a, b) { _qop = "cslew"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
var gt_text = function(p, ctx, a, b) { _qop = "csgtw"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
var ge_text = function(p, ctx, a, b) { _qop = "csgew"; _qop2 = 0; return cmp_text(p, ctx, a, b) }
// --- Comparisons (bool path) ---
var eq_bool = function(p, a, b) {

BIN
qbe.mach Normal file

Binary file not shown.

667
qbe_emit.cm Normal file
View File

@@ -0,0 +1,667 @@
// qbe_emit.cm — mcode IR → QBE IL compiler
// Takes mcode IR (from mcode.cm) and uses qbe.cm macros to produce
// a complete QBE IL program ready for the qbe compiler.
// qbe module is passed via env as 'qbe'
var qbe_emit = function(ir, qbe) {
var out = []
var data_out = []
var str_table = {}
var str_id = 0
var uid = 0
// ============================================================
// Output helpers
// ============================================================
var emit = function(s) {
push(out, s)
}
var fresh = function() {
uid = uid + 1
return "u" + text(uid)
}
var s = function(n) {
return "%s" + text(n)
}
var sanitize = function(lbl) {
var r = replace(lbl, ".", "_")
r = replace(r, "-", "_")
r = replace(r, " ", "_")
r = replace(r, "/", "_")
r = replace(r, "<", "")
r = replace(r, ">", "")
r = replace(r, "(", "")
r = replace(r, ")", "")
return r
}
// ============================================================
// String interning — emit data section entries
// ============================================================
var intern_str = function(val) {
if (str_table[val] != null) return str_table[val]
var label = "$d_str_" + text(str_id)
str_id = str_id + 1
var escaped = replace(val, "\\", "\\\\")
escaped = replace(escaped, "\"", "\\\"")
var line = "data " + label + ' = ' + '{ b "' + escaped + '", b 0 }'
push(data_out, line)
str_table[val] = label
return label
}
// ============================================================
// Extract property name from mcode operand
// ============================================================
var prop_name = function(a) {
if (is_text(a)) return a
if (is_object(a)) {
if (a.name != null) return a.name
if (a.value != null) return a.value
}
return null
}
// ============================================================
// Compile one function's instructions
// ============================================================
var compile_fn = function(fn, fn_idx, is_main) {
var instrs = fn.instructions
var nr_slots = fn.nr_slots
var nr_args = fn.nr_args
var name = is_main ? "cell_main" : "cell_fn_" + text(fn_idx)
name = sanitize(name)
var i = 0
var instr = null
var op = null
var a1 = null
var a2 = null
var a3 = null
var a4 = null
var p = null
var pn = null
var sl = null
var fop_id = 0
// Function signature: (ctx, frame_ptr) → JSValue
emit(`export function l $${name}(l %ctx, l %fp) {`)
emit("@entry")
// Load all slots from frame into SSA variables
// Each slot is a JSValue (8 bytes) at fp + slot*8
var off = 0
i = 0
while (i < nr_slots) {
off = i * 8
emit(` %p${text(i)} =l add %fp, ${text(off)}`)
emit(` ${s(i)} =l loadl %p${text(i)}`)
i = i + 1
}
// Walk instructions
i = 0
while (i < length(instrs)) {
instr = instrs[i]
i = i + 1
// Labels are plain strings
if (is_text(instr)) {
emit("@" + sanitize(instr))
continue
}
op = instr[0]
a1 = instr[1]
a2 = instr[2]
a3 = instr[3]
// --- Constants ---
if (op == "int") {
emit(` ${s(a1)} =l copy ${text(a2 * 2)}`)
continue
}
if (op == "null") {
emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`)
continue
}
if (op == "true") {
emit(` ${s(a1)} =l copy ${text(qbe.js_true)}`)
continue
}
if (op == "false") {
emit(` ${s(a1)} =l copy ${text(qbe.js_false)}`)
continue
}
if (op == "access") {
if (is_number(a2)) {
if (is_integer(a2)) {
emit(` ${s(a1)} =l copy ${text(a2 * 2)}`)
} else {
emit(` ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2)})`)
}
} else if (is_text(a2)) {
sl = intern_str(a2)
emit(` ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`)
} else if (is_object(a2)) {
if (a2.make == "intrinsic") {
sl = intern_str(a2.name)
emit(` ${s(a1)} =l call $cell_rt_get_intrinsic(l %ctx, l ${sl})`)
} else if (a2.kind == "number") {
if (a2.number != null && is_integer(a2.number)) {
emit(` ${s(a1)} =l copy ${text(a2.number * 2)}`)
} else if (a2.number != null) {
emit(` ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2.number)})`)
} else {
emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`)
}
} else if (a2.kind == "text") {
sl = intern_str(a2.value)
emit(` ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`)
} else if (a2.kind == "true") {
emit(` ${s(a1)} =l copy ${text(qbe.js_true)}`)
} else if (a2.kind == "false") {
emit(` ${s(a1)} =l copy ${text(qbe.js_false)}`)
} else if (a2.kind == "null") {
emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`)
} else {
emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`)
}
} else {
emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`)
}
continue
}
// --- Movement ---
if (op == "move") {
emit(` ${s(a1)} =l copy ${s(a2)}`)
continue
}
// --- Arithmetic (int path) — use qbe.cm macros ---
if (op == "add_int") {
p = fresh()
emit(qbe.add_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "sub_int") {
p = fresh()
emit(qbe.sub_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "mul_int") {
p = fresh()
emit(qbe.mul_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "div_int") {
p = fresh()
emit(qbe.div_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "mod_int") {
p = fresh()
emit(qbe.mod_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
// --- Arithmetic (float path) ---
if (op == "add_float") {
p = fresh()
emit(qbe.add_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "sub_float") {
p = fresh()
emit(qbe.sub_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "mul_float") {
p = fresh()
emit(qbe.mul_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "div_float") {
p = fresh()
emit(qbe.div_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "mod_float") {
p = fresh()
emit(qbe.mod_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
// --- String concat ---
if (op == "concat") {
p = fresh()
emit(qbe.concat(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
// --- Type checks — use qbe.cm macros ---
if (op == "is_int") {
p = fresh()
emit(qbe.is_int(p, s(a2)))
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
continue
}
if (op == "is_text") {
p = fresh()
emit(qbe.is_imm_text(p, s(a2)))
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
continue
}
if (op == "is_num") {
p = fresh()
emit(qbe.is_number(p, s(a2)))
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
continue
}
if (op == "is_bool") {
p = fresh()
emit(qbe.is_bool(p, s(a2)))
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
continue
}
if (op == "is_null") {
p = fresh()
emit(qbe.is_null(p, s(a2)))
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
continue
}
if (op == "is_identical") {
p = fresh()
emit(qbe.is_identical(p, s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
// --- Comparisons (int path) ---
if (op == "eq_int") {
p = fresh()
emit(qbe.eq_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "ne_int") {
p = fresh()
emit(qbe.ne_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "lt_int") {
p = fresh()
emit(qbe.lt_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "gt_int") {
p = fresh()
emit(qbe.gt_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "le_int") {
p = fresh()
emit(qbe.le_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "ge_int") {
p = fresh()
emit(qbe.ge_int(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
// --- Comparisons (float/text/bool) ---
if (op == "eq_float") {
p = fresh()
emit(qbe.eq_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "ne_float") {
p = fresh()
emit(qbe.ne_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "lt_float" || op == "gt_float" || op == "le_float" || op == "ge_float") {
p = fresh()
fop_id = 0
if (op == "lt_float") fop_id = 2
else if (op == "le_float") fop_id = 3
else if (op == "gt_float") fop_id = 4
else if (op == "ge_float") fop_id = 5
emit(qbe.cmp_float != null ? cmp_float(p, "%ctx", s(a2), s(a3), fop_id) : ` %${p} =l call $qbe_float_cmp(l %ctx, w ${text(fop_id)}, l ${s(a2)}, l ${s(a3)})`)
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "eq_text") {
p = fresh()
emit(qbe.eq_text(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "ne_text") {
p = fresh()
emit(qbe.ne_text(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "lt_text" || op == "gt_text" || op == "le_text" || op == "ge_text") {
p = fresh()
emit(` ${s(a1)} =l call $cell_rt_${op}(l %ctx, l ${s(a2)}, l ${s(a3)})`)
continue
}
if (op == "eq_bool") {
p = fresh()
emit(qbe.eq_bool(p, s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "ne_bool") {
p = fresh()
emit(qbe.ne_bool(p, s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "eq_tol" || op == "ne_tol") {
emit(` ${s(a1)} =l call $cell_rt_${op}(l %ctx, l ${s(a2)}, l ${s(a3)})`)
continue
}
// --- Boolean ops ---
if (op == "not") {
p = fresh()
emit(qbe.lnot(p, "%ctx", s(a2)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "and") {
emit(` ${s(a1)} =l and ${s(a2)}, ${s(a3)}`)
continue
}
if (op == "or") {
emit(` ${s(a1)} =l or ${s(a2)}, ${s(a3)}`)
continue
}
// --- Bitwise ops — use qbe.cm macros ---
if (op == "bitnot") {
p = fresh()
emit(qbe.bnot(p, "%ctx", s(a2)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "bitand") {
p = fresh()
emit(qbe.band(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "bitor") {
p = fresh()
emit(qbe.bor(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "bitxor") {
p = fresh()
emit(qbe.bxor(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "shl") {
p = fresh()
emit(qbe.shl(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "shr") {
p = fresh()
emit(qbe.shr(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
if (op == "ushr") {
p = fresh()
emit(qbe.ushr(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
continue
}
// --- Property access — runtime calls ---
if (op == "load_field") {
pn = prop_name(a3)
if (pn != null) {
sl = intern_str(pn)
emit(` ${s(a1)} =l call $cell_rt_load_field(l %ctx, l ${s(a2)}, l ${sl})`)
} else {
emit(` ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`)
}
continue
}
if (op == "load_index") {
emit(` ${s(a1)} =l call $cell_rt_load_index(l %ctx, l ${s(a2)}, l ${s(a3)})`)
continue
}
if (op == "load_dynamic") {
emit(` ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`)
continue
}
if (op == "store_field") {
pn = prop_name(a3)
if (pn != null) {
sl = intern_str(pn)
emit(` call $cell_rt_store_field(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${sl})`)
} else {
emit(` call $cell_rt_store_dynamic(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`)
}
continue
}
if (op == "store_index") {
emit(` call $cell_rt_store_index(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`)
continue
}
if (op == "store_dynamic") {
emit(` call $cell_rt_store_dynamic(l %ctx, l ${s(a1)}, l ${s(a2)}, l ${s(a3)})`)
continue
}
// --- Closure access ---
if (op == "get") {
emit(` ${s(a1)} =l call $cell_rt_get_closure(l %ctx, l %fp, l ${text(a2)}, l ${text(a3)})`)
continue
}
if (op == "put") {
emit(` call $cell_rt_put_closure(l %ctx, l %fp, l ${s(a1)}, l ${text(a2)}, l ${text(a3)})`)
continue
}
// --- Control flow ---
if (op == "jump") {
emit(` jmp @${sanitize(a1)}`)
continue
}
if (op == "jump_true") {
p = fresh()
emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`)
emit(`@${p}_f`)
continue
}
if (op == "jump_false") {
p = fresh()
emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`)
emit(`@${p}_t`)
continue
}
if (op == "jump_null") {
p = fresh()
emit(` %${p} =w ceql ${s(a1)}, ${text(qbe.js_null)}`)
emit(` jnz %${p}, @${sanitize(a2)}, @${p}_nn`)
emit(`@${p}_nn`)
continue
}
if (op == "jump_not_null") {
p = fresh()
emit(` %${p} =w cnel ${s(a1)}, ${text(qbe.js_null)}`)
emit(` jnz %${p}, @${sanitize(a2)}, @${p}_n`)
emit(`@${p}_n`)
continue
}
if (op == "wary_true") {
p = fresh()
emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
emit(` jnz %${p}, @${sanitize(a2)}, @${p}_f`)
emit(`@${p}_f`)
continue
}
if (op == "wary_false") {
p = fresh()
emit(` %${p} =w call $JS_ToBool(l %ctx, l ${s(a1)})`)
emit(` jnz %${p}, @${p}_t, @${sanitize(a2)}`)
emit(`@${p}_t`)
continue
}
// --- Function calls ---
if (op == "frame") {
emit(` ${s(a1)} =l call $cell_rt_frame(l %ctx, l ${s(a2)}, l ${text(a3)})`)
continue
}
if (op == "setarg") {
emit(` call $cell_rt_setarg(l ${s(a1)}, l ${text(a2)}, l ${s(a3)})`)
continue
}
if (op == "invoke") {
emit(` ${s(a2)} =l call $cell_rt_invoke(l %ctx, l ${s(a1)})`)
continue
}
if (op == "goframe") {
emit(` ${s(a1)} =l call $cell_rt_goframe(l %ctx, l ${s(a2)}, l ${text(a3)})`)
continue
}
if (op == "goinvoke") {
emit(` call $cell_rt_goinvoke(l %ctx, l ${s(a1)})`)
continue
}
// --- Function object creation ---
if (op == "function") {
emit(` ${s(a1)} =l call $cell_rt_make_function(l %ctx, l ${text(a2)})`)
continue
}
// --- Array push/pop ---
if (op == "push") {
emit(` call $cell_rt_push(l %ctx, l ${s(a1)}, l ${s(a2)})`)
continue
}
if (op == "pop") {
emit(` ${s(a1)} =l call $cell_rt_pop(l %ctx, l ${s(a2)})`)
continue
}
// --- Misc ---
if (op == "return") {
emit(` ret ${s(a1)}`)
continue
}
if (op == "disrupt") {
emit(` call $cell_rt_disrupt(l %ctx)`)
emit(` ret ${text(qbe.js_null)}`)
continue
}
if (op == "delete") {
emit(` ${s(a1)} =l call $cell_rt_delete(l %ctx, l ${s(a2)}, l ${s(a3)})`)
continue
}
if (op == "typeof") {
emit(` ${s(a1)} =l call $cell_rt_typeof(l %ctx, l ${s(a2)})`)
continue
}
// --- Unknown opcode ---
emit(` # unknown: ${op}`)
}
emit("}")
emit("")
}
// ============================================================
// Main: compile all functions then main
// ============================================================
var fi = 0
while (fi < length(ir.functions)) {
compile_fn(ir.functions[fi], fi, false)
fi = fi + 1
}
compile_fn(ir.main, -1, true)
// Assemble: data section first, then function bodies
var result = []
var di = 0
while (di < length(data_out)) {
push(result, data_out[di])
di = di + 1
}
if (length(data_out) > 0) push(result, "")
di = 0
while (di < length(out)) {
push(result, out[di])
di = di + 1
}
return text(result, "\n")
}
return qbe_emit

BIN
qbe_emit.mach Normal file

Binary file not shown.

154
qbe_rt.c Normal file
View File

@@ -0,0 +1,154 @@
/*
* qbe_rt.c - Runtime support for QBE-compiled ƿit modules
*
* Provides non-inline versions of static-inline quickjs functions
* (which QBE-generated code calls as external symbols) and stub
* implementations of cell_rt_* helper functions.
*/
#include <stdint.h>
#include <string.h>
#include <math.h>
typedef uint64_t JSValue;
typedef struct JSContext JSContext;
#define JS_TAG_SHORT_FLOAT 5
#define JS_TAG_NULL 7
#define JS_VAL_NULL 7
/* ============================================================
Non-inline wrappers for static-inline quickjs functions
============================================================ */
/*
* __JS_NewFloat64 — encode double as tagged JSValue
* Short float: [sign:1][exp:8][mantissa:52][tag:3]
* Returns tagged int if value is an exact integer in int32 range
*/
JSValue __JS_NewFloat64(JSContext *ctx, double d) {
union { double d; uint64_t u; } u;
u.d = d;
uint64_t sign = u.u >> 63;
int exp = (u.u >> 52) & 0x7FF;
uint64_t mantissa = u.u & ((1ULL << 52) - 1);
/* Zero */
if (exp == 0 && mantissa == 0)
return JS_TAG_SHORT_FLOAT;
/* NaN/Inf → null */
if (exp == 0x7FF)
return JS_VAL_NULL;
/* Subnormals → zero */
if (exp == 0)
return (sign << 63) | JS_TAG_SHORT_FLOAT;
int short_exp = exp - 1023 + 127;
if (short_exp < 1 || short_exp > 254)
return JS_VAL_NULL;
/* Prefer integer if exact */
if (d >= (double)(-2147483647 - 1) && d <= (double)2147483647) {
int32_t i = (int32_t)d;
if ((double)i == d)
return (uint64_t)(uint32_t)i << 1;
}
return (sign << 63)
| ((uint64_t)short_exp << 55)
| (mantissa << 3)
| JS_TAG_SHORT_FLOAT;
}
/*
* JS_IsNumber — check if value is tagged int or short float
*/
int JS_IsNumber(JSValue v) {
int is_int = (v & 1) == 0;
int is_float = (v & 7) == JS_TAG_SHORT_FLOAT;
return is_int || is_float;
}
/*
* JS_NewString — create string from C string (wraps JS_NewStringLen)
*/
extern JSValue JS_NewStringLen(JSContext *ctx, const char *str, size_t len);
JSValue JS_NewString(JSContext *ctx, const char *str) {
return JS_NewStringLen(ctx, str, strlen(str));
}
/* ============================================================
cell_rt_* stubs — error/fallback paths for QBE-compiled code
These are called from type-mismatch branches that should not
be reached in pure numeric code.
============================================================ */
extern JSValue JS_ThrowTypeError(JSContext *ctx, const char *fmt, ...);
void cell_rt_disrupt(JSContext *ctx) {
JS_ThrowTypeError(ctx, "type error in native code");
}
JSValue cell_rt_lt_text(JSContext *ctx, JSValue a, JSValue b) {
return JS_VAL_NULL;
}
JSValue cell_rt_gt_text(JSContext *ctx, JSValue a, JSValue b) {
return JS_VAL_NULL;
}
JSValue cell_rt_le_text(JSContext *ctx, JSValue a, JSValue b) {
return JS_VAL_NULL;
}
JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b) {
return JS_VAL_NULL;
}
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b) {
return JS_VAL_NULL;
}
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b) {
return JS_VAL_NULL;
}
JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) {
return JS_VAL_NULL;
}
JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) {
return JS_VAL_NULL;
}
JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) {
return JS_VAL_NULL;
}
JSValue cell_rt_load_index(JSContext *ctx, JSValue arr, JSValue idx) {
return JS_VAL_NULL;
}
void cell_rt_store_field(JSContext *ctx, JSValue val, JSValue obj,
const char *name) {}
void cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj,
JSValue key) {}
void cell_rt_store_index(JSContext *ctx, JSValue val, JSValue arr,
JSValue idx) {}
JSValue cell_rt_get_closure(JSContext *ctx, void *fp, int64_t depth,
int64_t index) {
return JS_VAL_NULL;
}
void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth,
int64_t index) {}
JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) {
return JS_VAL_NULL;
}
void cell_rt_setarg(JSValue frame, int64_t idx, JSValue val) {}
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame) { return JS_VAL_NULL; }
JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int64_t nargs) {
return JS_VAL_NULL;
}
void cell_rt_goinvoke(JSContext *ctx, JSValue frame) {}
JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx) {
return JS_VAL_NULL;
}
void cell_rt_push(JSContext *ctx, JSValue arr, JSValue val) {}
JSValue cell_rt_pop(JSContext *ctx, JSValue arr) { return JS_VAL_NULL; }
JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key) {
return JS_VAL_NULL;
}
JSValue cell_rt_typeof(JSContext *ctx, JSValue val) { return JS_VAL_NULL; }

View File

@@ -12,6 +12,9 @@ var files = [
{src: "parse.cm", name: "parse", out: "parse.mach"},
{src: "fold.cm", name: "fold", out: "fold.mach"},
{src: "mcode.cm", name: "mcode", out: "mcode.mach"},
{src: "streamline.cm", name: "streamline", out: "streamline.mach"},
{src: "qbe.cm", name: "qbe", out: "qbe.mach"},
{src: "qbe_emit.cm", name: "qbe_emit", out: "qbe_emit.mach"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"},
{src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"}
]

69
run_native.ce Normal file
View File

@@ -0,0 +1,69 @@
// run_native.ce — load a module both interpreted and native, compare speed
//
// Usage:
// cell --core . run_native.ce <module>
//
// Loads <module>.cm via use() (interpreted) and <module>.dylib (native),
// runs both and compares results and timing.
var os = use('os')
if (length(args) < 1) {
print('usage: cell --core . run_native.ce <module>')
print(' e.g. cell --core . run_native.ce num_torture')
return
}
var name = args[0]
if (ends_with(name, '.cm')) {
name = text(name, 0, length(name) - 3)
}
var safe = replace(replace(name, '/', '_'), '-', '_')
var symbol = 'js_' + safe + '_use'
var dylib_path = './' + name + '.dylib'
var fd = use('fd')
// --- Interpreted run ---
print('--- interpreted ---')
var t1 = os.now()
var result_interp = use(name)
var t2 = os.now()
var ms_interp = (t2 - t1) / 1000000
print('result: ' + text(result_interp))
print('time: ' + text(ms_interp) + ' ms')
// --- Native run ---
if (!fd.is_file(dylib_path)) {
print('\nno ' + dylib_path + ' found — run compile.ce first')
return
}
print('\n--- native ---')
var t3 = os.now()
var lib = os.dylib_open(dylib_path)
var t4 = os.now()
var result_native = os.dylib_symbol(lib, symbol)
var t5 = os.now()
var ms_load = (t4 - t3) / 1000000
var ms_exec = (t5 - t4) / 1000000
var ms_native = (t5 - t3) / 1000000
print('result: ' + text(result_native))
print('load: ' + text(ms_load) + ' ms')
print('exec: ' + text(ms_exec) + ' ms')
print('total: ' + text(ms_native) + ' ms')
// --- Comparison ---
print('\n--- comparison ---')
var match = result_interp == result_native
var speedup = 0
var speedup_exec = 0
print('match: ' + text(match))
if (ms_native > 0) {
speedup = ms_interp / ms_native
print('speedup: ' + text(speedup) + 'x (total)')
}
if (ms_exec > 0) {
speedup_exec = ms_interp / ms_exec
print('speedup: ' + text(speedup_exec) + 'x (exec only)')
}

View File

@@ -323,6 +323,7 @@ int cell_init(int argc, char **argv)
/* Default: run script through bootstrap pipeline */
int use_mcode = 0;
int emit_qbe = 0;
int arg_start = 1;
const char *shop_override = NULL;
const char *core_override = NULL;
@@ -332,6 +333,10 @@ int cell_init(int argc, char **argv)
if (strcmp(argv[arg_start], "--mcode") == 0) {
use_mcode = 1;
arg_start++;
} else if (strcmp(argv[arg_start], "--emit-qbe") == 0) {
use_mcode = 1; // QBE requires mcode pipeline
emit_qbe = 1;
arg_start++;
} else if (strcmp(argv[arg_start], "--shop") == 0) {
if (arg_start + 1 >= argc) {
printf("ERROR: --shop requires a path argument\n");
@@ -416,6 +421,7 @@ int cell_init(int argc, char **argv)
JS_SetPropertyStr(ctx, hidden_env, "shop_path",
shop_path ? JS_NewString(ctx, shop_path) : JS_NULL);
JS_SetPropertyStr(ctx, hidden_env, "use_mcode", JS_NewBool(ctx, use_mcode));
JS_SetPropertyStr(ctx, hidden_env, "emit_qbe", JS_NewBool(ctx, emit_qbe));
JS_SetPropertyStr(ctx, hidden_env, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val));
JS_SetPropertyStr(ctx, hidden_env, "json", js_json_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "nota", js_nota_use(ctx));

15
streamline.ce Normal file
View File

@@ -0,0 +1,15 @@
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var filename = args[0]
var src = text(fd.slurp(filename))
var result = tokenize(src, filename)
var ast = parse(result.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
print(json.encode(optimized))

351
streamline.cm Normal file
View File

@@ -0,0 +1,351 @@
// streamline.cm — mcode IR optimizer
// Single forward pass: type inference + strength reduction
var streamline = function(ir) {
// Type constants
var T_UNKNOWN = "unknown"
var T_INT = "int"
var T_FLOAT = "float"
var T_NUM = "num"
var T_TEXT = "text"
var T_BOOL = "bool"
var T_NULL = "null"
// Integer arithmetic ops that produce integer results
var int_result_ops = {
add_int: true, sub_int: true, mul_int: true,
div_int: true, mod_int: true
}
// Float arithmetic ops that produce float results
var float_result_ops = {
add_float: true, sub_float: true, mul_float: true,
div_float: true, mod_float: true
}
// Comparison ops that produce bool results
var bool_result_ops = {
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
le_int: true, ge_int: true,
eq_float: true, ne_float: true, lt_float: true, gt_float: true,
le_float: true, ge_float: true,
eq_text: true, ne_text: true, lt_text: true, gt_text: true,
le_text: true, ge_text: true,
eq_bool: true, ne_bool: true,
eq_tol: true, ne_tol: true,
not: true, and: true, or: true,
is_int: true, is_text: true, is_num: true,
is_bool: true, is_null: true, is_identical: true
}
// Type check opcodes and what type they verify
var type_check_map = {
is_int: T_INT,
is_text: T_TEXT,
is_num: T_NUM,
is_bool: T_BOOL,
is_null: T_NULL
}
// Determine the type of an access literal value
var access_value_type = function(val) {
if (is_number(val)) {
if (is_integer(val)) {
return T_INT
}
return T_FLOAT
}
if (is_text(val)) {
return T_TEXT
}
return T_UNKNOWN
}
// Update slot_types for an instruction (shared tracking logic)
var track_types = function(slot_types, instr) {
var op = instr[0]
var src_type = null
if (op == "access") {
slot_types[text(instr[1])] = access_value_type(instr[2])
} else if (op == "int") {
slot_types[text(instr[1])] = T_INT
} else if (op == "true" || op == "false") {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "null") {
slot_types[text(instr[1])] = T_NULL
} else if (op == "move") {
src_type = slot_types[text(instr[2])]
if (src_type != null) {
slot_types[text(instr[1])] = src_type
} else {
slot_types[text(instr[1])] = T_UNKNOWN
}
} else if (int_result_ops[op] == true) {
slot_types[text(instr[1])] = T_INT
} else if (float_result_ops[op] == true) {
slot_types[text(instr[1])] = T_FLOAT
} else if (op == "concat") {
slot_types[text(instr[1])] = T_TEXT
} else if (bool_result_ops[op] == true) {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "invoke") {
slot_types[text(instr[2])] = T_UNKNOWN
} else if (op == "pop" || op == "get" || op == "function") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "typeof") {
slot_types[text(instr[1])] = T_TEXT
} else if (op == "neg_int") {
slot_types[text(instr[1])] = T_INT
} else if (op == "neg_float") {
slot_types[text(instr[1])] = T_FLOAT
} else if (op == "bitnot" || op == "bitand" || op == "bitor" ||
op == "bitxor" || op == "shl" || op == "shr" || op == "ushr") {
slot_types[text(instr[1])] = T_INT
}
return null
}
// Check if a slot has a known type (with T_NUM subsumption)
var slot_is = function(slot_types, slot, typ) {
var known = slot_types[text(slot)]
if (known == null) {
return false
}
if (known == typ) {
return true
}
if (typ == T_NUM && (known == T_INT || known == T_FLOAT)) {
return true
}
return false
}
// Optimize a single function's instructions
var optimize_function = function(func) {
var instructions = func.instructions
var num_instr = 0
var slot_types = null
var nop_counter = 0
var i = 0
var instr = null
var op = null
var dest = 0
var src = 0
var checked_type = null
var next = null
var next_op = null
var target_label = null
var src_known = null
var jlen = 0
var j = 0
var peek = null
if (instructions == null || length(instructions) == 0) {
return null
}
num_instr = length(instructions)
slot_types = {}
// Peephole optimization pass: type tracking + strength reduction
i = 0
while (i < num_instr) {
instr = instructions[i]
// Labels are join points: clear all type info (conservative)
if (is_text(instr)) {
slot_types = {}
i = i + 1
continue
}
if (!is_array(instr)) {
i = i + 1
continue
}
op = instr[0]
// --- Peephole: type-check + jump where we know the type ---
if (type_check_map[op] != null && i + 1 < num_instr) {
dest = instr[1]
src = instr[2]
checked_type = type_check_map[op]
next = instructions[i + 1]
if (is_array(next)) {
next_op = next[0]
// Pattern: is_<type> t, x -> jump_false t, label
if (next_op == "jump_false" && next[1] == dest) {
target_label = next[2]
if (slot_is(slot_types, src, checked_type)) {
// Known match: check always true, never jumps — eliminate both
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
nop_counter = nop_counter + 1
instructions[i + 1] = "_nop_" + text(nop_counter)
slot_types[text(dest)] = T_BOOL
i = i + 2
continue
}
src_known = slot_types[text(src)]
if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) {
// Check for T_NUM subsumption: INT and FLOAT match T_NUM
if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) {
// Actually matches — eliminate both
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
nop_counter = nop_counter + 1
instructions[i + 1] = "_nop_" + text(nop_counter)
slot_types[text(dest)] = T_BOOL
i = i + 2
continue
}
// Known mismatch: always jumps — nop the check, rewrite jump
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
jlen = length(next)
instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]]
slot_types[text(dest)] = T_UNKNOWN
i = i + 2
continue
}
// Unknown: can't eliminate, but narrow type on fallthrough
slot_types[text(dest)] = T_BOOL
slot_types[text(src)] = checked_type
i = i + 2
continue
}
// Pattern: is_<type> t, x -> jump_true t, label
if (next_op == "jump_true" && next[1] == dest) {
target_label = next[2]
if (slot_is(slot_types, src, checked_type)) {
// Known match: always true, always jumps — nop check, rewrite to jump
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
jlen = length(next)
instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]]
slot_types[text(dest)] = T_BOOL
i = i + 2
continue
}
src_known = slot_types[text(src)]
if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) {
if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) {
// Actually matches T_NUM — always jumps
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
jlen = length(next)
instructions[i + 1] = ["jump", target_label, next[jlen - 2], next[jlen - 1]]
slot_types[text(dest)] = T_BOOL
i = i + 2
continue
}
// Known mismatch: never jumps — eliminate both
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
nop_counter = nop_counter + 1
instructions[i + 1] = "_nop_" + text(nop_counter)
slot_types[text(dest)] = T_BOOL
i = i + 2
continue
}
// Unknown: can't optimize
slot_types[text(dest)] = T_BOOL
i = i + 2
continue
}
}
// Standalone type check (no jump following): just track the result
slot_types[text(dest)] = T_BOOL
i = i + 1
continue
}
// --- Strength reduction: load_dynamic / store_dynamic ---
if (op == "load_dynamic") {
if (slot_is(slot_types, instr[3], T_TEXT)) {
instr[0] = "load_field"
} else if (slot_is(slot_types, instr[3], T_INT)) {
instr[0] = "load_index"
}
slot_types[text(instr[1])] = T_UNKNOWN
i = i + 1
continue
}
if (op == "store_dynamic") {
if (slot_is(slot_types, instr[3], T_TEXT)) {
instr[0] = "store_field"
} else if (slot_is(slot_types, instr[3], T_INT)) {
instr[0] = "store_index"
}
i = i + 1
continue
}
// --- Standard type tracking ---
track_types(slot_types, instr)
i = i + 1
}
// Second pass: remove dead jumps (jump to the immediately next label)
i = 0
while (i < num_instr) {
instr = instructions[i]
if (is_array(instr) && instr[0] == "jump") {
target_label = instr[1]
// Check if the very next non-nop item is that label
j = i + 1
while (j < num_instr) {
peek = instructions[j]
if (is_text(peek)) {
if (peek == target_label) {
nop_counter = nop_counter + 1
instructions[i] = "_nop_" + text(nop_counter)
}
break
}
if (is_array(peek)) {
break
}
j = j + 1
}
}
i = i + 1
}
return null
}
// Process main function
if (ir.main != null) {
optimize_function(ir.main)
}
// Process all sub-functions
var fi = 0
if (ir.functions != null) {
fi = 0
while (fi < length(ir.functions)) {
optimize_function(ir.functions[fi])
fi = fi + 1
}
}
return ir
}
return streamline

BIN
streamline.mach Normal file

Binary file not shown.

View File

@@ -1,4 +1,5 @@
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var filename = args[0]
var src = text(fd.slurp(filename))