Merge branch 'mcode_streamline' into runtime_rework

This commit is contained in:
2026-02-13 15:42:20 -06:00
31 changed files with 113406 additions and 121977 deletions

View File

@@ -113,6 +113,19 @@ var v = a[] // pop: v is 3, a is [1, 2]
- `packages/` — core packages
- `Makefile` — build system (`make` to rebuild, `make bootstrap` for first build)
## Testing
After any C runtime changes, run all three test suites before considering the work done:
```
make # rebuild
./cell --dev vm_suite # VM-level tests (641 tests)
./cell --dev test suite # language-level tests (493 tests)
./cell --dev fuzz # fuzzer (100 iterations)
```
All three must pass with 0 failures.
## Documentation
The `docs/` folder is the single source of truth. The website at `website/` mounts it via Hugo. Key files:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

153
boot/seed_bootstrap.cm Normal file
View File

@@ -0,0 +1,153 @@
// seed_bootstrap.cm — minimal bootstrap for regenerating boot files
// Loads only the compiler pipeline, runs a script directly (no engine/actors)
// Usage: ./cell --dev --seed regen
//
// Hidden env: os, core_path, shop_path, args, json
var load_internal = os.load_internal
var fd = load_internal("js_fd_use")
var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
function use_basic(path) {
if (use_cache[path])
return use_cache[path]
var result = load_internal("js_" + replace(path, '/', '_') + "_use")
if (result) {
use_cache[path] = result
return result
}
return null
}
// Load a module from boot .mcode — no caching, just eval
function boot_load(name) {
var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
var mcode_json = null
if (!fd.is_file(mcode_path)) {
print("seed: missing boot mcode: " + mcode_path + "\n")
disrupt
}
mcode_json = text(fd.slurp(mcode_path))
return mach_eval_mcode(name, mcode_json, {use: use_basic})
}
var tokenize_mod = boot_load("tokenize")
var parse_mod = boot_load("parse")
var fold_mod = boot_load("fold")
var mcode_mod = boot_load("mcode")
var streamline_mod = boot_load("streamline")
use_cache['tokenize'] = tokenize_mod
use_cache['parse'] = parse_mod
use_cache['fold'] = fold_mod
use_cache['mcode'] = mcode_mod
use_cache['streamline'] = streamline_mod
function analyze(src, filename) {
var tok_result = tokenize_mod(src, filename)
var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
var _i = 0
var e = null
var has_errors = ast.errors != null && length(ast.errors) > 0
if (has_errors) {
while (_i < length(ast.errors)) {
e = ast.errors[_i]
if (e.line != null) {
print(`${filename}:${text(e.line)}:${text(e.column)}: error: ${e.message}`)
} else {
print(`${filename}: error: ${e.message}`)
}
_i = _i + 1
}
disrupt
}
return fold_mod(ast)
}
function run_ast(name, ast, env) {
var compiled = mcode_mod(ast)
var optimized = streamline_mod(compiled)
var mcode_json = json.encode(optimized)
return mach_eval_mcode(name, mcode_json, env)
}
function use_fn(path) {
var result = null
var file_path = null
var script = null
var ast = null
var mcode_path = null
var mcode_json = null
if (use_cache[path])
return use_cache[path]
// Try C embed
result = load_internal("js_" + replace(path, '/', '_') + "_use")
if (result) {
use_cache[path] = result
return result
}
// Try boot mcode
mcode_path = core_path + '/boot/' + path + '.cm.mcode'
if (fd.is_file(mcode_path)) {
mcode_json = text(fd.slurp(mcode_path))
result = mach_eval_mcode(path, mcode_json, {use: use_fn})
use_cache[path] = result
return result
}
// Try .cm source (CWD then core)
file_path = path + '.cm'
if (!fd.is_file(file_path))
file_path = core_path + '/' + path + '.cm'
if (fd.is_file(file_path)) {
script = text(fd.slurp(file_path))
ast = analyze(script, file_path)
result = run_ast(path, ast, {use: use_fn})
use_cache[path] = result
return result
}
print("seed: module not found: " + path + "\n")
disrupt
}
// Run the program from args
var program = args[0]
var user_args = []
var _j = 1
var prog_path = null
var script = null
var ast = null
if (!program) {
print("seed: no program specified\n")
disrupt
}
while (_j < length(args)) {
push(user_args, args[_j])
_j = _j + 1
}
prog_path = program + '.ce'
if (!fd.is_file(prog_path))
prog_path = core_path + '/' + program + '.ce'
if (!fd.is_file(prog_path)) {
prog_path = program + '.cm'
if (!fd.is_file(prog_path))
prog_path = core_path + '/' + program + '.cm'
}
if (!fd.is_file(prog_path)) {
print("seed: program not found: " + program + "\n")
disrupt
}
script = text(fd.slurp(prog_path))
ast = analyze(script, prog_path)
run_ast(program, ast, {use: use_fn, args: user_args})

1344
boot/seed_bootstrap.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,42 +0,0 @@
// bootstrap.ce — regenerate .mach bytecode files consumed by the mach engine
// usage: cell bootstrap.ce
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var files = [
{src: "tokenize.cm", name: "tokenize", out: "tokenize.mach"},
{src: "parse.cm", name: "parse", out: "parse.mach"},
{src: "fold.cm", name: "fold", out: "fold.mach"},
{src: "mcode.cm", name: "mcode", out: "mcode.mach"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"},
{src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"}
]
var i = 0
var entry = null
var src = null
var tok_result = null
var ast = null
var folded = null
var ast_json = null
var bytecode = null
var f = null
while (i < length(files)) {
entry = files[i]
src = text(fd.slurp(entry.src))
tok_result = tokenize(src, entry.src)
ast = parse(tok_result.tokens, src, entry.src, tokenize)
folded = fold(ast)
ast_json = json.encode(folded)
bytecode = mach_compile_ast(entry.name, ast_json)
f = fd.open(entry.out, "w")
fd.write(f, bytecode)
fd.close(f)
print(`wrote ${entry.out}`)
i = i + 1
}

View File

@@ -27,7 +27,8 @@ Splits source text into tokens. Handles string interpolation by re-tokenizing te
Converts tokens into an AST. Also performs semantic analysis:
- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`.
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`, `"null"`.
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`. For `def` variables, type tags are also inferred from usage patterns: push (`x[] = v`) implies array, property access (`x.foo = v`) implies record, integer key implies array, text key implies record.
- **Type error detection**: For `def` variables with known type tags, provably wrong operations are reported as compile errors: property access on arrays, push on non-arrays, text keys on arrays, integer keys on records. Only `def` variables are checked because `var` can be reassigned.
- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`.
- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise.
- **Tail position**: Return statements where the expression is a call get `tail: true`.
@@ -40,8 +41,8 @@ Operates on the AST. Performs constant folding and type analysis:
- **Constant propagation**: Tracks `def` bindings whose values are known constants.
- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites.
- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`.
- **Purity marking**: Stamps `pure: true` on expressions with no side effects (literals, name references, arithmetic on pure operands).
- **Dead code elimination**: Removes unreachable branches when conditions are known constants.
- **Purity analysis**: Expressions with no side effects are marked pure (literals, name references, arithmetic on pure operands, calls to pure intrinsics). The pure intrinsic set contains only `is_*` sensory functions — they are the only intrinsics guaranteed to never disrupt regardless of argument types. Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types and are excluded.
- **Dead code elimination**: Removes unreachable branches when conditions are known constants. Removes unused `var`/`def` declarations with pure initializers. Removes standalone calls to pure intrinsics where the result is discarded.
### Mcode (`mcode.cm`)
@@ -51,6 +52,8 @@ Lowers the AST to a JSON-based intermediate representation with explicit operati
- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call).
- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup.
- **Intrinsic inlining**: Type-check intrinsics (`is_array`, `is_text`, `is_number`, `is_integer`, `is_logical`, `is_null`, `is_function`, `is_object`, `is_stone`), `length`, and `push` are emitted as direct opcodes instead of frame/setarg/invoke call sequences.
- **Disruption handler labels**: When a function has a disruption handler, a label is emitted before the handler code. This allows the streamline optimizer's unreachable code elimination to safely nop dead code after `return` without accidentally eliminating the handler.
- **Tail call marking**: When a return statement's expression is a call and the function has no disruption handler, the final `invoke` is renamed to `tail_invoke`. This marks the call site for future tail call optimization. Functions with disruption handlers cannot use TCO because the handler frame must remain on the stack.
See [Mcode IR](mcode.md) for the instruction format and complete instruction reference.
@@ -58,12 +61,13 @@ See [Mcode IR](mcode.md) for the instruction format and complete instruction ref
Optimizes the Mcode IR through a series of independent passes. Operates per-function:
1. **Backward type inference**: Infers parameter types from how they are used in typed operators. Immutable `def` parameters keep their inferred type across label join points.
1. **Backward type inference**: Infers parameter types from how they are used in typed operators (`add_int`, `store_index`, `load_field`, `push`, `pop`, etc.). Immutable `def` parameters keep their inferred type across label join points.
2. **Type-check elimination**: When a slot's type is known, eliminates `is_<type>` + conditional jump pairs. Narrows `load_dynamic`/`store_dynamic` to typed variants.
3. **Algebraic simplification**: Rewrites identity operations (add 0, multiply 1, divide 1) and folds same-slot comparisons.
4. **Boolean simplification**: Fuses `not` + conditional jump into a single jump with inverted condition.
5. **Move elimination**: Removes self-moves (`move a, a`).
6. **Dead jump elimination**: Removes jumps to the immediately following label.
6. **Unreachable elimination**: Nops dead code after `return` until the next label.
7. **Dead jump elimination**: Removes jumps to the immediately following label.
See [Streamline Optimizer](streamline.md) for detailed pass descriptions.

View File

@@ -37,7 +37,7 @@ Subsumption: `int` and `float` both satisfy a `num` check.
### 1. infer_param_types (backward type inference)
Scans all typed operators to determine what types their operands must be. For example, `add_int dest, a, b` implies both `a` and `b` are integers.
Scans typed operators and generic arithmetic to determine what types their operands must be. For example, `subtract dest, a, b` implies both `a` and `b` are numbers.
When a parameter slot (1..nr_args) is consistently inferred as a single type, that type is recorded. Since parameters are immutable (`def`), the inferred type holds for the entire function and persists across label join points (loop headers, branch targets).
@@ -45,20 +45,67 @@ Backward inference rules:
| Operator class | Operand type inferred |
|---|---|
| `add_int`, `sub_int`, `mul_int`, `div_int`, `mod_int`, `eq_int`, comparisons, bitwise | T_INT |
| `add_float`, `sub_float`, `mul_float`, `div_float`, `mod_float`, float comparisons | T_FLOAT |
| `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate` | T_NUM |
| `eq_int`, `ne_int`, `lt_int`, `gt_int`, `le_int`, `ge_int`, bitwise ops | T_INT |
| `eq_float`, `ne_float`, `lt_float`, `gt_float`, `le_float`, `ge_float` | T_FLOAT |
| `concat`, text comparisons | T_TEXT |
| `eq_bool`, `ne_bool`, `not`, `and`, `or` | T_BOOL |
| `store_index` (object operand) | T_ARRAY |
| `store_index` (index operand) | T_INT |
| `store_field` (object operand) | T_RECORD |
| `push` (array operand) | T_ARRAY |
| `load_index` (object operand) | T_ARRAY |
| `load_index` (index operand) | T_INT |
| `load_field` (object operand) | T_RECORD |
| `pop` (array operand) | T_ARRAY |
When a slot appears with conflicting type inferences (e.g., used in both `add_int` and `concat` across different type-dispatch branches), the result is `unknown`. INT + FLOAT conflicts produce `num`.
Note: `add` is excluded from backward inference because it is polymorphic — it handles both numeric addition and text concatenation. Only operators that are unambiguously numeric can infer T_NUM.
When a slot appears with conflicting type inferences, the result is `unknown`. INT + FLOAT conflicts produce `num`.
**Nop prefix:** none (analysis only, does not modify instructions)
### 2. eliminate_type_checks (type-check + jump elimination)
### 2. infer_slot_write_types (slot write-type invariance)
Scans all instructions to determine which non-parameter slots have a consistent write type. If every instruction that writes to a given slot produces the same type, that type is globally invariant and can safely persist across label join points.
This analysis is sound because:
- `alloc_slot()` in mcode.cm is monotonically increasing — temp slots are never reused
- All local variable declarations must be at function body level and initialized — slots are written before any backward jumps to loop headers
- `move` is conservatively treated as T_UNKNOWN, avoiding unsound transitive assumptions
Write type mapping:
| Instruction class | Write type |
|---|---|
| `int` | T_INT |
| `true`, `false` | T_BOOL |
| `null` | T_NULL |
| `access` | type of literal value |
| `array` | T_ARRAY |
| `record` | T_RECORD |
| `function` | T_FUNCTION |
| `length` | T_INT |
| bitwise ops | T_INT |
| `concat` | T_TEXT |
| bool ops, comparisons, `in` | T_BOOL |
| generic arithmetic (`add`, `subtract`, `negate`, etc.) | T_UNKNOWN |
| `move`, `load_field`, `load_index`, `load_dynamic`, `pop`, `get` | T_UNKNOWN |
| `invoke`, `tail_invoke` | T_UNKNOWN |
The result is a map of slot→type for slots where all writes agree on a single known type. Parameter slots (1..nr_args) and slot 0 are excluded.
Common patterns this enables:
- **Length variables** (`var len = length(arr)`): written by `length` (T_INT) only → invariant T_INT
- **Boolean flags** (`var found = false; ... found = true`): written by `false` and `true` → invariant T_BOOL
- **Locally-created containers** (`var arr = []`): written by `array` only → invariant T_ARRAY
Note: Loop counters (`var i = 0; i = i + 1`) are NOT invariant because `add` produces T_UNKNOWN. However, if `i` is a function parameter used in arithmetic, backward inference from `subtract`/`multiply`/etc. will infer T_NUM for it, which persists across labels.
**Nop prefix:** none (analysis only, does not modify instructions)
### 3. eliminate_type_checks (type-check + jump elimination)
Forward pass that tracks the known type of each slot. When a type check (`is_int`, `is_text`, `is_num`, etc.) is followed by a conditional jump, and the slot's type is already known, the check and jump can be eliminated or converted to an unconditional jump.
@@ -70,30 +117,13 @@ Three cases:
This pass also reduces `load_dynamic`/`store_dynamic` to `load_field`/`store_field` or `load_index`/`store_index` when the key slot's type is known.
At label join points, all type information is reset except for parameter types seeded by the backward inference pass.
At label join points, all type information is reset except for parameter types from backward inference and write-invariant types from slot write-type analysis.
**Nop prefix:** `_nop_tc_`
### 3. simplify_algebra (algebraic identity + comparison folding)
### 4. simplify_algebra (same-slot comparison folding)
Tracks known constant values alongside types. Rewrites identity operations:
| Pattern | Rewrite |
|---------|---------|
| `add_int dest, x, 0` | `move dest, x` |
| `add_int dest, 0, x` | `move dest, x` |
| `sub_int dest, x, 0` | `move dest, x` |
| `mul_int dest, x, 1` | `move dest, x` |
| `mul_int dest, 1, x` | `move dest, x` |
| `mul_int dest, x, 0` | `int dest, 0` |
| `div_int dest, x, 1` | `move dest, x` |
| `add_float dest, x, 0` | `move dest, x` |
| `mul_float dest, x, 1` | `move dest, x` |
| `div_float dest, x, 1` | `move dest, x` |
Float multiplication by zero is intentionally not optimized because it is not safe with NaN and Inf values.
Same-slot comparison folding:
Tracks known constant values. Folds same-slot comparisons:
| Pattern | Rewrite |
|---------|---------|
@@ -107,7 +137,7 @@ Same-slot comparison folding:
**Nop prefix:** none (rewrites in place, does not create nops)
### 4. simplify_booleans (not + jump fusion)
### 5. simplify_booleans (not + jump fusion)
Peephole pass that eliminates unnecessary `not` instructions:
@@ -121,21 +151,21 @@ This is particularly effective on `if (!cond)` patterns, which the compiler gene
**Nop prefix:** `_nop_bl_`
### 5. eliminate_moves (self-move elimination)
### 6. eliminate_moves (self-move elimination)
Removes `move a, a` instructions where the source and destination are the same slot. These can arise from earlier passes rewriting binary operations into moves.
**Nop prefix:** `_nop_mv_`
### 6. eliminate_unreachable (dead code after return/disrupt)
### 7. eliminate_unreachable (dead code after return)
*Currently disabled.* Nops instructions after `return` or `disrupt` until the next real label.
Nops instructions after `return` until the next real label. Only `return` is treated as a terminal instruction; `disrupt` is not, because the disruption handler code immediately follows `disrupt` and must remain reachable.
Disabled because disruption handler code is placed after the `return`/`disrupt` instruction without a label boundary. The VM dispatches to handlers via the `disruption_pc` offset, not through normal control flow. Re-enabling this pass requires the mcode compiler to emit labels at disruption handler entry points.
The mcode compiler emits a label at disruption handler entry points (see `emit_label(gen_label("disruption"))` in mcode.cm), which provides the label boundary that stops this pass from eliminating handler code.
**Nop prefix:** `_nop_ur_`
### 7. eliminate_dead_jumps (jump-to-next-label elimination)
### 8. eliminate_dead_jumps (jump-to-next-label elimination)
Removes `jump L` instructions where `L` is the immediately following label (skipping over any intervening nop strings). These are common after other passes eliminate conditional branches, leaving behind jumps that fall through naturally.
@@ -146,12 +176,13 @@ Removes `jump L` instructions where `L` is the immediately following label (skip
All passes run in sequence in `optimize_function`:
```
infer_param_types → returns param_types map
eliminate_type_checks → uses param_types
infer_param_types → returns param_types map
infer_slot_write_types → returns write_types map
eliminate_type_checks → uses param_types + write_types
simplify_algebra
simplify_booleans
eliminate_moves
(eliminate_unreachable) → disabled
eliminate_unreachable
eliminate_dead_jumps
```
@@ -177,6 +208,16 @@ Before streamlining, `mcode.cm` recognizes calls to built-in intrinsic functions
These inlined opcodes have corresponding Mach VM implementations in `mach.c`.
## Unified Arithmetic
Arithmetic operations use generic opcodes: `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate`. There are no type-dispatched variants (e.g., no `add_int`/`add_float`).
The Mach VM dispatches at runtime with an int-first fast path via `reg_vm_binop()`: it checks `JS_VALUE_IS_BOTH_INT` first for fast integer arithmetic, then falls back to float conversion, text concatenation (for `add` only), or type error.
Bitwise operations (`shl`, `shr`, `ushr`, `bitand`, `bitor`, `bitxor`, `bitnot`) remain integer-only and disrupt if operands are not integers.
The QBE/native backend maps generic arithmetic to helper calls (`qbe.add`, `qbe.sub`, etc.). The vision for the native path is that with sufficient type inference, the backend can unbox proven-numeric values to raw registers, operate directly, and only rebox at boundaries (returns, calls, stores).
## Debugging Tools
Three dump tools inspect the IR at different stages:
@@ -192,6 +233,124 @@ Usage:
./cell --core . dump_types.cm <file.ce|file.cm>
```
## Tail Call Marking
When a function's return expression is a call (`stmt.tail == true` from the parser) and the function has no disruption handler, mcode.cm renames the final `invoke` instruction to `tail_invoke`. This is semantically identical to `invoke` in the current Mach VM, but marks the call site for future tail call optimization.
The disruption handler restriction exists because TCO would discard the current frame, but the handler must remain on the stack to catch disruptions from the callee.
`tail_invoke` is handled by the same passes as `invoke` in streamline (type tracking, algebraic simplification) and executes identically in the VM.
## Type Propagation Architecture
Type information flows through three compilation stages, each building on the previous:
### Stage 1: Parse-time type tags (parse.cm)
The parser assigns `type_tag` strings to scope variable entries when the type is syntactically obvious:
- **From initializers**: `def a = []``type_tag: "array"`, `def n = 42``type_tag: "integer"`, `def r = {}``type_tag: "record"`
- **From usage patterns** (def only): `def x = null; x[] = v` infers `type_tag: "array"` from the push. `def x = null; x.foo = v` infers `type_tag: "record"` from property access.
- **Type error detection** (def only): When a `def` variable has a known type_tag, provably wrong operations are compile errors:
- Property access (`.`) on array
- Push (`[]`) on non-array
- Text key on array
- Integer key on record
Only `def` (constant) variables participate in type inference and error detection. `var` variables can be reassigned, making their initializer type unreliable.
### Stage 2: Fold-time type propagation (fold.cm)
The fold pass extends type information through the AST:
- **Intrinsic folding**: `is_array(known_array)` folds to `true`. `length(known_array)` gets `hint: "array_length"`.
- **Purity analysis**: Expressions involving only `is_*` intrinsic calls with pure arguments are considered pure. This enables dead code elimination for unused `var`/`def` bindings with pure initializers, and elimination of standalone pure call statements.
- **Dead code**: Unused pure `var`/`def` declarations are removed. Standalone calls to pure intrinsics (where the result is discarded) are removed. Unreachable branches with constant conditions are removed.
The `pure_intrinsics` set currently contains only `is_*` sensory functions (`is_array`, `is_text`, `is_number`, `is_integer`, `is_function`, `is_logical`, `is_null`, `is_object`, `is_stone`). Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types, so they are excluded — removing a call that would disrupt changes observable behavior.
### Stage 3: Streamline-time type tracking (streamline.cm)
The streamline optimizer uses a numeric type lattice (`T_INT`, `T_FLOAT`, `T_TEXT`, etc.) for fine-grained per-instruction tracking:
- **Backward inference** (pass 1): Scans typed operators to infer parameter types. Since parameters are `def` (immutable), inferred types persist across label boundaries.
- **Write-type invariance** (pass 2): Scans all instructions to find local slots where every write produces the same type. These invariant types persist across label boundaries alongside parameter types.
- **Forward tracking** (pass 3): `track_types` follows instruction execution order, tracking the type of each slot. Known-type operations set their destination type (e.g., `concat` → T_TEXT, `length` → T_INT). Generic arithmetic produces T_UNKNOWN. Type checks on unknown slots narrow the type on fallthrough.
- **Type check elimination** (pass 3): When a slot's type is already known, `is_<type>` + conditional jump pairs are eliminated or converted to unconditional jumps.
- **Dynamic access narrowing** (pass 3): `load_dynamic`/`store_dynamic` are narrowed to `load_field`/`store_field` or `load_index`/`store_index` when the key type is known.
Type information resets at label join points (since control flow merges could bring different types), except for parameter types from backward inference and write-invariant types from slot write-type analysis.
## Future Work
### Copy Propagation
A basic-block-local copy propagation pass would replace uses of a copied variable with its source, enabling further move elimination. An implementation was attempted but encountered an unsolved bug where 2-position instruction operand replacement produces incorrect code during self-hosting (the replacement logic for 3-position instructions works correctly). The root cause is not yet understood. See the project memory files for detailed notes.
### Expanded Purity Analysis
The current purity set is conservative (only `is_*`). It could be expanded by:
- **Argument-type-aware purity**: If all arguments to an intrinsic are known to be the correct types (via type_tag or slot_types), the call cannot disrupt and is safe to eliminate. For example, `length(known_array)` is pure but `length(unknown)` is not.
- **User function purity**: Analyze user-defined function bodies during pre_scan. A function is pure if its body contains only pure expressions and calls to known-pure functions. This requires fixpoint iteration for mutual recursion.
- **Callback-aware purity**: Intrinsics like `filter`, `find`, `reduce`, `some`, `every` are pure if their callback argument is pure.
### Forward Type Narrowing from Typed Operations
With unified arithmetic (generic `add`/`subtract`/`multiply`/`divide`/`modulo`/`negate` instead of typed variants), this approach is no longer applicable. Typed comparisons (`eq_int`, `lt_float`, etc.) still exist and their operands have known types, but these are already handled by backward inference.
### Guard Hoisting for Parameters
When a type check on a parameter passes (falls through), the parameter's type could be promoted to `param_types` so it persists across label boundaries. This would allow the first type check on a parameter to prove its type for the entire function. However, this is unsound for polymorphic parameters — if a function is called with different argument types, the first check would wrongly eliminate checks for subsequent types.
A safe version would require proving that a parameter is monomorphic (called with only one type across all call sites), which requires interprocedural analysis.
**Note:** For local variables (non-parameters), the write-type invariance analysis (pass 2) achieves a similar effect safely — if every write to a slot produces the same type, that type persists across labels without needing to hoist any guard.
### Tail Call Optimization
`tail_invoke` instructions are currently marked but execute identically to `invoke`. Actual TCO would reuse the current call frame instead of creating a new one. This requires:
- Ensuring argument count matches (or the frame can be resized)
- No live locals needed after the call (guaranteed by tail position)
- No disruption handler on the current function (already enforced by the marking)
- VM support in mach.c to rewrite the frame in place
### Interprocedural Type Inference
Currently all type inference is intraprocedural (within a single function). Cross-function analysis could:
- Infer return types from function bodies
- Propagate argument types from call sites to callees
- Specialize functions for known argument types (cloning)
### Strength Reduction
Common patterns that could be lowered to cheaper operations when operand types are known:
- `multiply x, 2` with proven-int operands → shift left
- `divide x, 2` with proven-int → arithmetic shift right
- `modulo x, power_of_2` with proven-int → bitwise and
### Numeric Unboxing (QBE/native path)
With unified arithmetic and backward type inference, the native backend can identify regions where numeric values remain in registers without boxing/unboxing:
1. **Guard once**: When backward inference proves a parameter is T_NUM, emit a single type guard at function entry.
2. **Unbox**: Convert the tagged JSValue to a raw double register.
3. **Operate**: Use native FP/int instructions directly (no function calls, no tag checks).
4. **Rebox**: Convert back to tagged JSValue only at rebox points (function returns, calls, stores to arrays/records).
This requires inserting `unbox`/`rebox` IR annotations (no-ops in the Mach VM, meaningful only to QBE).
### Loop-Invariant Code Motion
Type checks that are invariant across loop iterations (checking a variable that doesn't change in the loop body) could be hoisted above the loop. This would require identifying loop boundaries and proving invariance.
### Algebraic Identity Optimization
With unified arithmetic, algebraic identities (x+0→x, x*1→x, x*0→0, x/1→x) require knowing operand values at compile time. Since generic `add`/`multiply` operate on any numeric type, the constant-tracking logic in `simplify_algebra` could be extended to handle these for known-constant slots.
## Nop Convention
Eliminated instructions are replaced with strings matching `_nop_<prefix>_<counter>`. The prefix identifies which pass created the nop. Nop strings are:

View File

@@ -36,15 +36,9 @@ def T_RECORD = "record"
def T_FUNCTION = "function"
def int_result_ops = {
add_int: true, sub_int: true, mul_int: true,
div_int: true, mod_int: true, neg_int: true,
bitnot: true, bitand: true, bitor: true,
bitxor: true, shl: true, shr: true, ushr: true
}
def float_result_ops = {
add_float: true, sub_float: true, mul_float: true,
div_float: true, mod_float: true, neg_float: true
}
def bool_result_ops = {
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
le_int: true, ge_int: true,
@@ -85,8 +79,6 @@ var track_types = function(slot_types, instr) {
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
} else if (int_result_ops[op] == true) {
slot_types[text(instr[1])] = T_INT
} else if (float_result_ops[op] == true) {
slot_types[text(instr[1])] = T_FLOAT
} else if (op == "concat") {
slot_types[text(instr[1])] = T_TEXT
} else if (bool_result_ops[op] == true) {
@@ -99,7 +91,7 @@ var track_types = function(slot_types, instr) {
slot_types[text(instr[1])] = T_RECORD
} else if (op == "function") {
slot_types[text(instr[1])] = T_FUNCTION
} else if (op == "invoke") {
} else if (op == "invoke" || op == "tail_invoke") {
slot_types[text(instr[2])] = T_UNKNOWN
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
slot_types[text(instr[1])] = T_UNKNOWN
@@ -107,6 +99,9 @@ var track_types = function(slot_types, instr) {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "length") {
slot_types[text(instr[1])] = T_INT
} else if (op == "add" || op == "subtract" || op == "multiply" ||
op == "divide" || op == "modulo" || op == "pow" || op == "negate") {
slot_types[text(instr[1])] = T_UNKNOWN
}
return null
}

2
fd.cm
View File

@@ -97,4 +97,4 @@ fd.globfs = function(globs, dir) {
return results
}
return fd
return fd

23
fold.cm
View File

@@ -15,10 +15,18 @@ var fold = function(ast) {
return k == "number" || k == "text" || k == "true" || k == "false" || k == "null"
}
// Only intrinsics that can NEVER disrupt regardless of argument types
var pure_intrinsics = {
is_array: true, is_text: true, is_number: true, is_integer: true,
is_function: true, is_logical: true, is_null: true, is_object: true,
is_stone: true
}
var is_pure = function(expr) {
if (expr == null) return true
var k = expr.kind
var i = 0
var target = null
if (k == "number" || k == "text" || k == "true" || k == "false" ||
k == "null" || k == "name" || k == "this") return true
if (k == "function") return true
@@ -47,6 +55,17 @@ var fold = function(ast) {
if (k == "==" || k == "!=" || k == "&&" || k == "||") {
return is_pure(expr.left) && is_pure(expr.right)
}
if (k == "(") {
target = expr.expression
if (target != null && target.intrinsic == true && pure_intrinsics[target.name] == true) {
i = 0
while (i < length(expr.list)) {
if (!is_pure(expr.list[i])) return false
i = i + 1
}
return true
}
}
return false
}
@@ -676,6 +695,10 @@ var fold = function(ast) {
}
}
}
// Dead pure call elimination: standalone pure calls with no result
if (stmt.kind == "call" && is_pure(stmt.expression)) {
stmt.dead = true
}
// Dead function elimination
if (stmt.kind == "function" && stmt.name != null) {
sv = scope_var(fn_nr, stmt.name)

230
mcode.cm
View File

@@ -52,6 +52,7 @@ var mcode = function(ast) {
var s_cur_line = 0
var s_cur_col = 0
var s_filename = null
var s_has_disruption = false
// Shared closure vars for binop helpers (avoids >4 param functions)
var _bp_dest = 0
@@ -78,7 +79,8 @@ var mcode = function(ast) {
function_nr: s_function_nr,
intrinsic_cache: s_intrinsic_cache,
cur_line: s_cur_line,
cur_col: s_cur_col
cur_col: s_cur_col,
has_disruption: s_has_disruption
}
}
@@ -99,6 +101,7 @@ var mcode = function(ast) {
s_intrinsic_cache = saved.intrinsic_cache
s_cur_line = saved.cur_line
s_cur_col = saved.cur_col
s_has_disruption = saved.has_disruption
}
// Slot allocation
@@ -270,157 +273,19 @@ var mcode = function(ast) {
return node.kind == "null"
}
// emit_add_decomposed: int path -> text path -> float path -> disrupt
// emit_add_decomposed: emit generic add (VM dispatches int/float/text)
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_add_decomposed = function() {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(_bp_ln)
var left_is_text = is_known_text(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_text = is_known_text(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var not_int = null
var not_text = null
var done = null
var err = null
// Both sides known int
if (left_is_int && right_is_int) {
emit_3("add_int", dest, left, right)
// Known text+text → concat directly (skip numeric check in VM)
if (is_known_text(_bp_ln) && is_known_text(_bp_rn)) {
emit_3("concat", _bp_dest, _bp_left, _bp_right)
return null
}
// Both sides known text
if (left_is_text && right_is_text) {
emit_3("concat", dest, left, right)
return null
}
// Both sides known number (but not both int)
if (left_is_num && right_is_num) {
if (left_is_int && right_is_int) {
emit_3("add_int", dest, left, right)
} else {
emit_3("add_float", dest, left, right)
}
return null
}
not_int = gen_label("add_ni")
not_text = gen_label("add_nt")
done = gen_label("add_done")
err = gen_label("add_err")
// Int path
t0 = alloc_slot()
if (!left_is_int) {
emit_2("is_int", t0, left)
emit_jump_cond("jump_false", t0, not_int)
}
t1 = alloc_slot()
if (!right_is_int) {
emit_2("is_int", t1, right)
emit_jump_cond("jump_false", t1, not_int)
}
emit_3("add_int", dest, left, right)
emit_jump(done)
// Text path
emit_label(not_int)
if (!left_is_text) {
emit_2("is_text", t0, left)
emit_jump_cond("jump_false", t0, not_text)
}
if (!right_is_text) {
emit_2("is_text", t1, right)
emit_jump_cond("jump_false", t1, not_text)
}
emit_3("concat", dest, left, right)
emit_jump(done)
// Float path
emit_label(not_text)
if (!left_is_num) {
emit_2("is_num", t0, left)
emit_jump_cond("jump_false", t0, err)
}
if (!right_is_num) {
emit_2("is_num", t1, right)
emit_jump_cond("jump_false", t1, err)
}
emit_3("add_float", dest, left, right)
emit_jump(done)
emit_label(err)
emit_0("disrupt")
emit_label(done)
emit_3("add", _bp_dest, _bp_left, _bp_right)
return null
}
// emit_numeric_binop: int path -> float path -> disrupt
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_numeric_binop = function(int_op, float_op) {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var not_int = null
var done = null
var err = null
// Both sides known int
if (left_is_int && right_is_int) {
emit_3(int_op, dest, left, right)
return null
}
// Both sides known number (but not both int)
if (left_is_num && right_is_num) {
emit_3(float_op, dest, left, right)
return null
}
not_int = gen_label("num_ni")
done = gen_label("num_done")
err = gen_label("num_err")
t0 = alloc_slot()
if (!left_is_int) {
emit_2("is_int", t0, left)
emit_jump_cond("jump_false", t0, not_int)
}
t1 = alloc_slot()
if (!right_is_int) {
emit_2("is_int", t1, right)
emit_jump_cond("jump_false", t1, not_int)
}
emit_3(int_op, dest, left, right)
emit_jump(done)
emit_label(not_int)
if (!left_is_num) {
emit_2("is_num", t0, left)
emit_jump_cond("jump_false", t0, err)
}
if (!right_is_num) {
emit_2("is_num", t1, right)
emit_jump_cond("jump_false", t1, err)
}
emit_3(float_op, dest, left, right)
emit_jump(done)
emit_label(err)
emit_0("disrupt")
emit_label(done)
return null
}
// emit_numeric_binop removed — generic ops emitted directly via passthrough
// emit_eq_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(false)
// reads _bp_dest, _bp_left, _bp_right from closure
@@ -646,41 +511,9 @@ var mcode = function(ast) {
return null
}
// emit_neg_decomposed: int path -> float path -> disrupt
// emit_neg_decomposed: emit generic negate (VM dispatches int/float)
var emit_neg_decomposed = function(dest, src, src_node) {
var t0 = 0
var not_int = null
var done = null
var err = null
if (is_known_int(src_node)) {
emit_2("neg_int", dest, src)
return null
}
if (is_known_number(src_node)) {
emit_2("neg_float", dest, src)
return null
}
not_int = gen_label("neg_ni")
done = gen_label("neg_done")
err = gen_label("neg_err")
t0 = alloc_slot()
emit_2("is_int", t0, src)
emit_jump_cond("jump_false", t0, not_int)
emit_2("neg_int", dest, src)
emit_jump(done)
emit_label(not_int)
emit_2("is_num", t0, src)
emit_jump_cond("jump_false", t0, err)
emit_2("neg_float", dest, src)
emit_jump(done)
emit_label(err)
emit_0("disrupt")
emit_label(done)
emit_2("negate", dest, src)
return null
}
@@ -692,14 +525,6 @@ var mcode = function(ast) {
_bp_right = right
if (op_str == "add") {
emit_add_decomposed()
} else if (op_str == "subtract") {
emit_numeric_binop("sub_int", "sub_float")
} else if (op_str == "multiply") {
emit_numeric_binop("mul_int", "mul_float")
} else if (op_str == "divide") {
emit_numeric_binop("div_int", "div_float")
} else if (op_str == "modulo") {
emit_numeric_binop("mod_int", "mod_float")
} else if (op_str == "eq") {
emit_eq_decomposed()
} else if (op_str == "ne") {
@@ -713,7 +538,8 @@ var mcode = function(ast) {
} else if (op_str == "ge") {
emit_relational("ge_int", "ge_float", "ge_text")
} else {
// Passthrough for bitwise, pow, in, etc.
// Passthrough for subtract, multiply, divide, modulo,
// bitwise, pow, in, etc.
emit_3(op_str, dest, left, right)
}
return null
@@ -1024,9 +850,9 @@ var mcode = function(ast) {
emit_3("setarg", f, 2, item)
emit_2("invoke", f, acc)
if (forward) {
emit_3("add_int", i, i, one)
emit_3("add", i, i, one)
} else {
emit_3("sub_int", i, i, one)
emit_3("subtract", i, i, one)
}
emit_jump(loop_label)
}
@@ -1056,7 +882,7 @@ var mcode = function(ast) {
emit_3("setarg", f, 1, item)
emit_3("setarg", f, 2, i)
emit_2("invoke", f, discard)
emit_3("add_int", i, i, one)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(done_label)
emit_1("null", dest)
@@ -1090,7 +916,7 @@ var mcode = function(ast) {
emit_3("setarg", f, 1, item)
emit_2("invoke", f, val)
emit_jump_cond("jump_false", val, ret_false)
emit_3("add_int", i, i, one)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(ret_true)
emit_1("true", dest)
@@ -1128,7 +954,7 @@ var mcode = function(ast) {
emit_3("setarg", f, 1, item)
emit_2("invoke", f, val)
emit_jump_cond("jump_true", val, ret_true)
emit_3("add_int", i, i, one)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(ret_true)
emit_1("true", dest)
@@ -1170,7 +996,7 @@ var mcode = function(ast) {
emit_jump_cond("jump_false", val, skip_label)
emit_2("push", result, item)
emit_label(skip_label)
emit_3("add_int", i, i, one)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(done_label)
emit_2("move", dest, result)
@@ -1269,9 +1095,9 @@ var mcode = function(ast) {
emit_jump(final_label)
// No initial, reverse
emit_label(no_init_rev)
emit_3("sub_int", i, len, one)
emit_3("subtract", i, len, one)
emit_3("load_index", acc, arr_slot, i)
emit_3("sub_int", i, i, one)
emit_3("subtract", i, i, one)
emit_reduce_loop(r, false, d2)
emit_label(d2)
emit_2("move", dest, acc)
@@ -1292,7 +1118,7 @@ var mcode = function(ast) {
// Has initial, reverse
emit_label(init_rev)
emit_2("move", acc, init_slot)
emit_3("sub_int", i, len, one)
emit_3("subtract", i, len, one)
emit_reduce_loop(r, false, d4)
emit_label(d4)
emit_2("move", dest, acc)
@@ -2230,6 +2056,7 @@ var mcode = function(ast) {
var guard_t = 0
var guard_err = null
var guard_done = null
var last_instr = null
if (stmt == null) {
return null
@@ -2436,6 +2263,13 @@ var mcode = function(ast) {
expr = stmt.expression
if (expr != null) {
slot = gen_expr(expr, -1)
// Mark tail calls: rename last invoke to tail_invoke
if (stmt.tail == true && !s_has_disruption) {
last_instr = s_instructions[length(s_instructions) - 1]
if (is_array(last_instr) && last_instr[0] == "invoke") {
last_instr[0] = "tail_invoke"
}
}
emit_1("return", slot)
} else {
null_slot = alloc_slot()
@@ -2618,6 +2452,7 @@ var mcode = function(ast) {
s_label_map = {}
s_is_arrow = is_arrow
s_has_disruption = disrupt_clause != null && is_array(disrupt_clause)
s_function_nr = fn_nr_node != null ? fn_nr_node : 0
@@ -2726,6 +2561,7 @@ var mcode = function(ast) {
// Compile disruption clause
if (disrupt_clause != null && is_array(disrupt_clause)) {
emit_label(gen_label("disruption"))
disruption_start = length(s_instructions)
_i = 0
while (_i < length(disrupt_clause)) {

View File

@@ -1627,6 +1627,46 @@ var parse = function(tokens, src, filename, tokenizer) {
if (kind == "[" && left_node.right != null) {
sem_check_expr(scope, left_node.right)
}
// Type error detection for known-type constant objects
if (obj_expr != null && obj_expr.kind == "name" && obj_expr.name != null) {
v = sem_find_var(scope, obj_expr.name)
if (v != null && v.is_const && v.type_tag != null) {
if (kind == ".") {
if (v.type_tag == "array") {
sem_error(left_node, "cannot set property on array '" + obj_expr.name + "'")
}
} else if (kind == "[") {
if (left_node.right == null) {
// Push: a[] = val
if (v.type_tag != "array") {
sem_error(left_node, "push only works on arrays, not " + v.type_tag + " '" + obj_expr.name + "'")
}
} else if (v.type_tag == "array") {
if (left_node.right.kind == "text") {
sem_error(left_node, "cannot use text key on array '" + obj_expr.name + "'")
}
} else if (v.type_tag == "record") {
if (left_node.right.kind == "number" && is_integer(left_node.right.number)) {
sem_error(left_node, "cannot use integer key on record '" + obj_expr.name + "'; use text key")
}
}
}
} else if (v != null && v.is_const && v.type_tag == null) {
// Infer type_tag from usage pattern (def only)
if (kind == ".") {
v.type_tag = "record"
} else if (kind == "[") {
if (left_node.right == null) {
// Push: a[] = val → array
v.type_tag = "array"
} else if (left_node.right.kind == "number" && is_integer(left_node.right.number)) {
v.type_tag = "array"
} else if (left_node.right.kind == "text") {
v.type_tag = "record"
}
}
}
}
}
}
@@ -1878,7 +1918,7 @@ var parse = function(tokens, src, filename, tokenizer) {
sem_check_expr(scope, stmt.right)
if (name != null) {
tt = derive_type_tag(stmt.right)
if (tt != null) {
if (tt != null && tt != "null") {
existing = sem_find_var(scope, name)
if (existing != null) existing.type_tag = tt
}

175
qbe.cm
View File

@@ -732,167 +732,6 @@ var ushr = function(p, ctx, a, b) {
// These map directly to the new IR ops emitted by mcode.cm.
// ============================================================
// --- Arithmetic (int path) ---
// add_int: assume both operands are tagged ints. Overflow -> float.
var add_int = function(p, ctx, a, b) {
return ` %${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.sum =l add %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.sum, ${int32_min}
%${p}.hi =w csgtl %${p}.sum, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.ov, @${p}.ok
@${p}.ok
%${p}.rw =w copy %${p}.sum
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.ov
%${p}.fd =d sltof %${p}.sum
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
@${p}.done
`
}
var sub_int = function(p, ctx, a, b) {
return ` %${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.diff =l sub %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.diff, ${int32_min}
%${p}.hi =w csgtl %${p}.diff, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.ov, @${p}.ok
@${p}.ok
%${p}.rw =w copy %${p}.diff
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.ov
%${p}.fd =d sltof %${p}.diff
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
@${p}.done
`
}
var mul_int = function(p, ctx, a, b) {
return ` %${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.prod =l mul %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.prod, ${int32_min}
%${p}.hi =w csgtl %${p}.prod, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.ov, @${p}.ok
@${p}.ok
%${p}.rw =w copy %${p}.prod
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.ov
%${p}.fd =d sltof %${p}.prod
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
@${p}.done
`
}
var div_int = function(p, ctx, a, b) {
return ` %${p}.ia =w copy 0
%${p}.tmp =l sar ${a}, 1
%${p}.ia =w copy %${p}.tmp
%${p}.ib =w copy 0
%${p}.tmp2 =l sar ${b}, 1
%${p}.ib =w copy %${p}.tmp2
%${p}.div0 =w ceqw %${p}.ib, 0
jnz %${p}.div0, @${p}.null, @${p}.chk
@${p}.null
%${p} =l copy ${js_null}
jmp @${p}.done
@${p}.chk
%${p}.rem =w rem %${p}.ia, %${p}.ib
%${p}.exact =w ceqw %${p}.rem, 0
jnz %${p}.exact, @${p}.idiv, @${p}.fdiv
@${p}.idiv
%${p}.q =w div %${p}.ia, %${p}.ib
%${p}.qext =l extuw %${p}.q
%${p} =l shl %${p}.qext, 1
jmp @${p}.done
@${p}.fdiv
%${p}.da =d swtof %${p}.ia
%${p}.db =d swtof %${p}.ib
%${p}.dr =d div %${p}.da, %${p}.db
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.dr)
@${p}.done
`
}
var mod_int = function(p, ctx, a, b) {
return ` %${p}.ia =w copy 0
%${p}.tmp =l sar ${a}, 1
%${p}.ia =w copy %${p}.tmp
%${p}.ib =w copy 0
%${p}.tmp2 =l sar ${b}, 1
%${p}.ib =w copy %${p}.tmp2
%${p}.div0 =w ceqw %${p}.ib, 0
jnz %${p}.div0, @${p}.null, @${p}.do_mod
@${p}.null
%${p} =l copy ${js_null}
jmp @${p}.done
@${p}.do_mod
%${p}.r =w rem %${p}.ia, %${p}.ib
%${p}.rext =l extuw %${p}.r
%${p} =l shl %${p}.rext, 1
@${p}.done
`
}
var neg_int = function(p, ctx, v) {
return ` %${p}.sl =l sar ${v}, 1
%${p}.iw =w copy %${p}.sl
%${p}.is_min =w ceqw %${p}.iw, ${int32_min}
jnz %${p}.is_min, @${p}.ov, @${p}.ok
@${p}.ov
%${p}.fd =d swtof %${p}.iw
%${p}.fdn =d neg %${p}.fd
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fdn)
jmp @${p}.done
@${p}.ok
%${p}.ni =w sub 0, %${p}.iw
%${p}.niext =l extuw %${p}.ni
%${p} =l shl %${p}.niext, 1
@${p}.done
`
}
// --- Arithmetic (float path) ---
var add_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_add(l ${ctx}, l ${a}, l ${b})
`
}
var sub_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_sub(l ${ctx}, l ${a}, l ${b})
`
}
var mul_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_mul(l ${ctx}, l ${a}, l ${b})
`
}
var div_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_div(l ${ctx}, l ${a}, l ${b})
`
}
var mod_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_mod(l ${ctx}, l ${a}, l ${b})
`
}
var neg_float = function(p, ctx, v) {
return ` %${p} =l call $qbe_float_neg(l ${ctx}, l ${v})
`
}
// --- Text concat ---
var concat = function(p, ctx, a, b) {
return ` %${p} =l call $JS_ConcatString(l ${ctx}, l ${a}, l ${b})
@@ -1039,20 +878,6 @@ return {
shl: shl,
shr: shr,
ushr: ushr,
// decomposed arithmetic (int path)
add_int: add_int,
sub_int: sub_int,
mul_int: mul_int,
div_int: div_int,
mod_int: mod_int,
neg_int: neg_int,
// decomposed arithmetic (float path)
add_float: add_float,
sub_float: sub_float,
mul_float: mul_float,
div_float: div_float,
mod_float: mod_float,
neg_float: neg_float,
// text concat
concat: concat,
// decomposed comparisons (int)

View File

@@ -201,77 +201,46 @@ var qbe_emit = function(ir, qbe) {
continue
}
// --- Arithmetic (int path) — use qbe.cm macros ---
// --- Generic arithmetic (VM dispatches int/float) ---
if (op == "add_int") {
if (op == "add") {
p = fresh()
emit(qbe.add_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.add(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "sub_int") {
if (op == "subtract") {
p = fresh()
emit(qbe.sub_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.sub(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mul_int") {
if (op == "multiply") {
p = fresh()
emit(qbe.mul_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.mul(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "div_int") {
if (op == "divide") {
p = fresh()
emit(qbe.div_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.div(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mod_int") {
if (op == "modulo") {
p = fresh()
emit(qbe.mod_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.mod(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
// --- Arithmetic (float path) ---
if (op == "add_float") {
if (op == "negate") {
p = fresh()
emit(qbe.add_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "sub_float") {
p = fresh()
emit(qbe.sub_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mul_float") {
p = fresh()
emit(qbe.mul_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "div_float") {
p = fresh()
emit(qbe.div_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mod_float") {
p = fresh()
emit(qbe.mod_float(p, "%ctx", s(a2), s(a3)))
emit(qbe.neg(p, "%ctx", s(a2)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue

View File

@@ -1,5 +1,4 @@
// regen.ce — regenerate .mcode bytecode files and pre-warm .mach cache
// Run with: ./cell --core . regen
var fd = use("fd")
var json = use("json")
@@ -20,7 +19,8 @@ var files = [
{src: "qbe_emit.cm", name: "qbe_emit", out: "boot/qbe_emit.cm.mcode"},
{src: "verify_ir.cm", name: "verify_ir", out: "boot/verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "boot/bootstrap.cm.mcode"},
{src: "internal/engine.cm", name: "engine", out: "boot/engine.cm.mcode"}
{src: "internal/engine.cm", name: "engine", out: "boot/engine.cm.mcode"},
{src: "boot/seed_bootstrap.cm", name: "seed_bootstrap", out: "boot/seed_bootstrap.cm.mcode"}
]
// Resolve shop_path for cache writes

View File

@@ -11,8 +11,9 @@
#include "cell_internal.h"
#include "cJSON.h"
#define BOOTSTRAP_MCODE "boot/bootstrap.cm.mcode"
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
#define BOOTSTRAP_MCODE "boot/bootstrap.cm.mcode"
#define SEED_BOOTSTRAP_MCODE "boot/seed_bootstrap.cm.mcode"
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
#define CELL_SHOP_DIR ".cell"
#define CELL_CORE_DIR "packages/core"
@@ -362,6 +363,7 @@ static void print_usage(const char *prog)
printf(" --core <path> Set core path directly (overrides CELL_CORE)\n");
printf(" --shop <path> Set shop path (overrides CELL_SHOP)\n");
printf(" --dev Dev mode (shop=.cell, core=.)\n");
printf(" --seed Use seed bootstrap (minimal, for regen)\n");
printf(" --test [heap_size] Run C test suite\n");
printf(" -h, --help Show this help message\n");
printf("\nEnvironment:\n");
@@ -394,6 +396,7 @@ int cell_init(int argc, char **argv)
/* Default: run script through bootstrap pipeline */
int arg_start = 1;
int seed_mode = 0;
const char *shop_override = NULL;
const char *core_override = NULL;
@@ -413,6 +416,9 @@ int cell_init(int argc, char **argv)
}
core_override = argv[arg_start + 1];
arg_start += 2;
} else if (strcmp(argv[arg_start], "--seed") == 0) {
seed_mode = 1;
arg_start++;
} else if (strcmp(argv[arg_start], "--dev") == 0) {
shop_override = ".cell";
core_override = ".";
@@ -438,8 +444,9 @@ int cell_init(int argc, char **argv)
actor_initialize();
const char *boot_mcode = seed_mode ? SEED_BOOTSTRAP_MCODE : BOOTSTRAP_MCODE;
size_t boot_size;
char *boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
char *boot_data = load_core_file(boot_mcode, &boot_size);
if (!boot_data) {
printf("ERROR: Could not load bootstrap from %s\n", core_path);
return 1;
@@ -460,7 +467,7 @@ int cell_init(int argc, char **argv)
free(bin_data);
return 1;
}
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, 16 * 1024 * 1024);
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, 1024 * 1024);
if (!ctx) {
printf("Failed to create JS context\n");
free(bin_data); JS_FreeRuntime(g_runtime);

View File

@@ -442,7 +442,7 @@ JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count) {
if (!frame) return NULL;
/* cap56 = slot count (used by gc_object_size) */
frame->hdr = objhdr_make(slot_count, OBJ_FRAME, 0, 0, 0, 0);
frame->header = objhdr_make(slot_count, OBJ_FRAME, 0, 0, 0, 0);
frame->function = JS_NULL;
frame->caller = JS_NULL;
frame->address = JS_NewInt32(ctx, 0);
@@ -767,9 +767,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
/* Setup initial frame — wrap top-level code in a function object so that
returning from a called register function can read code/env from frame */
JSValue top_fn = js_new_register_function(ctx, code, env_gc.val, of_gc.val);
JS_PopGCRef(ctx, &of_gc);
env = env_gc.val; /* refresh — GC may have moved env during allocation */
JS_PopGCRef(ctx, &env_gc);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
frame->function = top_fn;
frame->slots[0] = this_gc.val; /* slot 0 = this */
@@ -780,6 +778,8 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
for (int i = nargs_copy - 1; i >= 0; i--) JS_PopGCRef(ctx, &arg_gcs[i]);
JS_PopGCRef(ctx, &this_gc);
JS_PopGCRef(ctx, &of_gc);
JS_PopGCRef(ctx, &env_gc);
uint32_t pc = code->entry_point;
JSValue result = JS_NULL;
@@ -1262,100 +1262,6 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
/* === New mcode-derived opcodes === */
/* Typed integer arithmetic — inline with overflow to float */
case MACH_ADD_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
int64_t r = (int64_t)ia + (int64_t)ib;
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
break;
}
case MACH_SUB_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
int64_t r = (int64_t)ia - (int64_t)ib;
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
break;
}
case MACH_MUL_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
int64_t r = (int64_t)ia * (int64_t)ib;
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
break;
}
case MACH_DIV_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
if (ib == 0) { frame->slots[a] = JS_NULL; break; }
if (ia % ib == 0) frame->slots[a] = JS_NewInt32(ctx, ia / ib);
else frame->slots[a] = JS_NewFloat64(ctx, (double)ia / (double)ib);
break;
}
case MACH_MOD_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
if (ib == 0) { frame->slots[a] = JS_NULL; break; }
frame->slots[a] = JS_NewInt32(ctx, ia % ib);
break;
}
case MACH_NEG_INT: {
int32_t i = JS_VALUE_GET_INT(frame->slots[b]);
if (i == INT32_MIN)
frame->slots[a] = JS_NewFloat64(ctx, -(double)i);
else
frame->slots[a] = JS_NewInt32(ctx, -i);
break;
}
/* Typed float arithmetic */
case MACH_ADD_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
frame->slots[a] = JS_NewFloat64(ctx, da + db);
break;
}
case MACH_SUB_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
frame->slots[a] = JS_NewFloat64(ctx, da - db);
break;
}
case MACH_MUL_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
frame->slots[a] = JS_NewFloat64(ctx, da * db);
break;
}
case MACH_DIV_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
if (db == 0.0) { frame->slots[a] = JS_NULL; break; }
frame->slots[a] = JS_NewFloat64(ctx, da / db);
break;
}
case MACH_MOD_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
if (db == 0.0) { frame->slots[a] = JS_NULL; break; }
frame->slots[a] = JS_NewFloat64(ctx, fmod(da, db));
break;
}
case MACH_NEG_FLOAT: {
double d;
JS_ToFloat64(ctx, &d, frame->slots[b]);
frame->slots[a] = JS_NewFloat64(ctx, -d);
break;
}
/* Text concatenation */
case MACH_CONCAT: {
JSValue res = JS_ConcatString(ctx, frame->slots[b], frame->slots[c]);
@@ -1653,7 +1559,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
case MACH_INVOKE: {
/* A=frame_slot, B=result_slot */
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
int nr = (int)objhdr_cap56(fr->hdr);
int nr = (int)objhdr_cap56(fr->header);
int c_argc = (nr >= 2) ? nr - 2 : 0;
JSValue fn_val = fr->function;
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
@@ -1704,7 +1610,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
case MACH_GOINVOKE: {
/* Tail call: replace current frame with callee */
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
int nr = (int)objhdr_cap56(fr->hdr);
int nr = (int)objhdr_cap56(fr->header);
int c_argc = (nr >= 2) ? nr - 2 : 0;
JSValue fn_val = fr->function;
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
@@ -2022,7 +1928,7 @@ static int mcode_reg_items(cJSON *it, cJSON **out) {
{ ADD(1); return c; }
/* invoke: [1]=frame, [2]=dest (result register) */
if (!strcmp(op, "invoke")) { ADD(1); ADD(2); return c; }
if (!strcmp(op, "invoke") || !strcmp(op, "tail_invoke")) { ADD(1); ADD(2); return c; }
/* goinvoke: [1]=frame only (no result) */
if (!strcmp(op, "goinvoke")) { ADD(1); return c; }
@@ -2375,24 +2281,16 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
else if (strcmp(op, "false") == 0) { EM(MACH_ABC(MACH_LOADFALSE, A1, 0, 0)); }
else if (strcmp(op, "null") == 0) { EM(MACH_ABC(MACH_LOADNULL, A1, 0, 0)); }
else if (strcmp(op, "move") == 0) { AB2(MACH_MOVE); }
/* Typed integer arithmetic */
else if (strcmp(op, "add_int") == 0) { ABC3(MACH_ADD_INT); }
else if (strcmp(op, "sub_int") == 0) { ABC3(MACH_SUB_INT); }
else if (strcmp(op, "mul_int") == 0) { ABC3(MACH_MUL_INT); }
else if (strcmp(op, "div_int") == 0) { ABC3(MACH_DIV_INT); }
else if (strcmp(op, "mod_int") == 0) { ABC3(MACH_MOD_INT); }
else if (strcmp(op, "neg_int") == 0) { AB2(MACH_NEG_INT); }
/* Typed float arithmetic */
else if (strcmp(op, "add_float") == 0) { ABC3(MACH_ADD_FLOAT); }
else if (strcmp(op, "sub_float") == 0) { ABC3(MACH_SUB_FLOAT); }
else if (strcmp(op, "mul_float") == 0) { ABC3(MACH_MUL_FLOAT); }
else if (strcmp(op, "div_float") == 0) { ABC3(MACH_DIV_FLOAT); }
else if (strcmp(op, "mod_float") == 0) { ABC3(MACH_MOD_FLOAT); }
else if (strcmp(op, "neg_float") == 0) { AB2(MACH_NEG_FLOAT); }
/* Text */
else if (strcmp(op, "concat") == 0) { ABC3(MACH_CONCAT); }
/* Generic arithmetic */
else if (strcmp(op, "add") == 0) { ABC3(MACH_ADD); }
else if (strcmp(op, "subtract") == 0) { ABC3(MACH_SUB); }
else if (strcmp(op, "multiply") == 0) { ABC3(MACH_MUL); }
else if (strcmp(op, "divide") == 0) { ABC3(MACH_DIV); }
else if (strcmp(op, "modulo") == 0) { ABC3(MACH_MOD); }
else if (strcmp(op, "pow") == 0) { ABC3(MACH_POW); }
else if (strcmp(op, "negate") == 0) { AB2(MACH_NEG); }
/* Typed integer comparisons */
else if (strcmp(op, "eq_int") == 0) { ABC3(MACH_EQ_INT); }
else if (strcmp(op, "ne_int") == 0) { ABC3(MACH_NE_INT); }
@@ -2569,7 +2467,7 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
else if (strcmp(op, "setarg") == 0) {
EM(MACH_ABC(MACH_SETARG, A1, A2, A3));
}
else if (strcmp(op, "invoke") == 0) {
else if (strcmp(op, "invoke") == 0 || strcmp(op, "tail_invoke") == 0) {
EM(MACH_ABC(MACH_INVOKE, A1, A2, 0));
}
else if (strcmp(op, "goframe") == 0) {

View File

@@ -356,7 +356,7 @@ void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) {
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
int nr_slots = (int)objhdr_cap56(fr->hdr);
int nr_slots = (int)objhdr_cap56(fr->header);
int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
/* Copy args to C stack */

View File

@@ -115,6 +115,14 @@
#define gc_poison_region(addr, size) ((void)0)
#define gc_unpoison_region(addr, size) ((void)0)
#endif
#include <sys/mman.h>
#include <unistd.h>
static inline size_t poison_page_align(size_t size) {
size_t ps = (size_t)sysconf(_SC_PAGESIZE);
return (size + ps - 1) & ~(ps - 1);
}
#endif /* POISON_HEAP */
#ifdef HAVE_ASAN
@@ -361,13 +369,7 @@ struct JSClass {
#define JS_MODE_BACKTRACE_BARRIER \
(1 << 3) /* stop backtrace before this frame */
typedef struct JSFrameRegister {
objhdr_t hdr; // capacity in this is the total number of words of the object, including the 4 words of overhead and all slots
JSValue function; // JSFunction, function object being invoked
JSValue caller; // JSFrameRegister, the frame that called this one
JSValue address; // address of the instruction in the code that should be executed upon return
JSValue slots[]; // inline memory. order is [this][input args][closed over vars][non closed over vars][temporaries]
} JSFrameRegister; /// extra note: when this frame returns, caller should be set to 0. If caller is found to be 0, then the GC can reduce this frame's slots down to [this][input_args][closed over vars]; if no closed over vars it can be totally removed; may happen naturally in GC since it would have no refs?
/* JSFrameRegister is now an alias for JSFrame — see JSFrame definition below */
/* ============================================================
Register-Based VM Data Structures
@@ -554,22 +556,6 @@ typedef enum MachOpcode {
/* === New mcode-derived opcodes (1:1 mapping to mcode IR) === */
/* Typed integer arithmetic (ABC) */
MACH_ADD_INT, /* R(A) = R(B) + R(C) — int, overflow → float */
MACH_SUB_INT, /* R(A) = R(B) - R(C) — int */
MACH_MUL_INT, /* R(A) = R(B) * R(C) — int */
MACH_DIV_INT, /* R(A) = R(B) / R(C) — int */
MACH_MOD_INT, /* R(A) = R(B) % R(C) — int */
MACH_NEG_INT, /* R(A) = -R(B) — int (AB) */
/* Typed float arithmetic (ABC) */
MACH_ADD_FLOAT, /* R(A) = R(B) + R(C) — float */
MACH_SUB_FLOAT, /* R(A) = R(B) - R(C) — float */
MACH_MUL_FLOAT, /* R(A) = R(B) * R(C) — float */
MACH_DIV_FLOAT, /* R(A) = R(B) / R(C) — float */
MACH_MOD_FLOAT, /* R(A) = R(B) % R(C) — float */
MACH_NEG_FLOAT, /* R(A) = -R(B) — float (AB) */
/* Text */
MACH_CONCAT, /* R(A) = R(B) ++ R(C) — string concatenation */
@@ -723,18 +709,6 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = {
[MACH_NEQ_TOL] = "neq_tol",
[MACH_NOP] = "nop",
/* Mcode-derived */
[MACH_ADD_INT] = "add_int",
[MACH_SUB_INT] = "sub_int",
[MACH_MUL_INT] = "mul_int",
[MACH_DIV_INT] = "div_int",
[MACH_MOD_INT] = "mod_int",
[MACH_NEG_INT] = "neg_int",
[MACH_ADD_FLOAT] = "add_float",
[MACH_SUB_FLOAT] = "sub_float",
[MACH_MUL_FLOAT] = "mul_float",
[MACH_DIV_FLOAT] = "div_float",
[MACH_MOD_FLOAT] = "mod_float",
[MACH_NEG_FLOAT] = "neg_float",
[MACH_CONCAT] = "concat",
[MACH_EQ_INT] = "eq_int",
[MACH_NE_INT] = "ne_int",
@@ -823,17 +797,18 @@ typedef struct JSCodeRegister {
} JSCodeRegister;
/* Frame for closures - used by link-time relocation model where closures
reference outer frames via (depth, slot) addressing.
Stores function as JSValue to survive GC movements. */
/* Unified frame struct — used by the register VM and closures.
All fields are JSValues so the GC can scan them uniformly. */
typedef struct JSFrame {
objhdr_t header; /* OBJ_FRAME, cap56 = slot count */
JSValue function; /* JSValue for GC safety (use JS_VALUE_GET_FUNCTION) */
JSValue caller; /* JSValue for GC safety (unused currently) */
uint32_t return_pc;
JSValue function; /* JSFunction, function object being invoked */
JSValue caller; /* JSFrame, the frame that called this one */
JSValue address; /* return PC stored as JS_NewInt32 */
JSValue slots[]; /* [this][args][captured][locals][temps] */
} JSFrame;
typedef JSFrame JSFrameRegister;
static inline objhdr_t objhdr_set_s (objhdr_t h, bool s) {
return s ? (h | OBJHDR_S_MASK) : (h & ~OBJHDR_S_MASK);
}
@@ -903,7 +878,8 @@ typedef struct JSArray {
JSValue values[]; /* inline flexible array member */
} JSArray;
/* JSBlob - binary data per memory.md */
/* JSBlob — not allocated on GC heap (blobs use JSRecord + opaque).
Struct kept for reference; gc_object_size/gc_scan_object do not handle OBJ_BLOB. */
typedef struct JSBlob {
objhdr_t mist_hdr;
word_t length;
@@ -926,7 +902,7 @@ typedef slot JSRecordEntry;
typedef struct JSRecord {
objhdr_t mist_hdr;
struct JSRecord *proto;
JSValue proto; /* prototype as JSValue (JS_NULL if none) */
word_t len; /* number of entries */
slot slots[]; /* slots[0] reserved: key low32=class_id, key high32=rec_id, val=opaque */
} JSRecord;
@@ -1071,6 +1047,13 @@ static JS_BOOL JSText_equal_ascii (const JSText *text, JSValue imm) {
enough to call the interrupt callback often. */
#define JS_INTERRUPT_COUNTER_INIT 10000
/* Auto-rooted C call argv — GC updates values in-place */
typedef struct CCallRoot {
JSValue *argv; /* points to C-stack-local array */
int argc;
struct CCallRoot *prev; /* stack for nesting (C -> JS -> C -> ...) */
} CCallRoot;
struct JSContext {
JSRuntime *rt;
@@ -1081,18 +1064,18 @@ struct JSContext {
size_t current_block_size; /* current block size (64KB initially) */
size_t next_block_size; /* doubles if <10% recovered after GC */
/* Stone arena - permanent immutable allocations */
uint8_t *stone_base; /* stone arena base */
uint8_t *stone_free; /* stone arena bump pointer */
uint8_t *stone_end; /* stone arena end */
/* Constant text pool — compilation constants */
uint8_t *ct_base; /* pool base */
uint8_t *ct_free; /* pool bump pointer */
uint8_t *ct_end; /* pool end */
/* Stone text intern table */
void *st_pages; /* stone page list for large allocations */
uint32_t *st_text_hash; /* hash table (slot -> id) */
JSText **st_text_array; /* array of JSText pointers indexed by id */
uint32_t st_text_size; /* hash table size (power of 2) */
uint32_t st_text_count; /* number of interned texts */
uint32_t st_text_resize; /* threshold for resize */
/* Constant text intern table */
void *ct_pages; /* page list for large allocations */
uint32_t *ct_hash; /* hash table (slot -> id) */
JSText **ct_array; /* array of JSText pointers indexed by id */
uint32_t ct_size; /* hash table size (power of 2) */
uint32_t ct_count; /* number of interned texts */
uint32_t ct_resize_threshold; /* threshold for resize */
uint16_t binary_object_count;
int binary_object_size;
@@ -1102,6 +1085,7 @@ struct JSContext {
JSGCRef *top_gc_ref; /* used to reference temporary GC roots (stack top) */
JSGCRef *last_gc_ref; /* used to reference temporary GC roots (list) */
CCallRoot *c_call_root; /* stack of auto-rooted C call argv arrays */
int class_count; /* size of class_array and class_proto */
JSClass *class_array;
@@ -1161,22 +1145,22 @@ static inline const char *JS_KeyGetStr (JSContext *ctx, char *buf, size_t buf_si
/* ============================================================
Stone Arena Functions
Constant Text Pool Functions
============================================================ */
/* Stone page for large allocations */
typedef struct StonePage {
struct StonePage *next;
/* Constant text page for large allocations */
typedef struct CTPage {
struct CTPage *next;
size_t size;
uint8_t data[];
} StonePage;
} CTPage;
/* Initial stone text table size */
#define ST_TEXT_INITIAL_SIZE 256
/* Initial constant text table size */
#define CT_INITIAL_SIZE 256
/* Allocate from stone arena (permanent, immutable memory) */
/* Allocate from constant text pool */
/* Resize the stone text intern hash table */
/* Resize the constant text intern hash table */
/* Realloc with slack reporting (for bump allocator)
WARNING: This function is NOT GC-safe! The caller must protect the source
@@ -1192,9 +1176,9 @@ static int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size);
/* JS_MarkValue - mark a value during GC traversal.
With copying GC, this is a no-op as we discover live objects by tracing. */
/* Helper to check if a pointer is in stone memory */
static inline int is_stone_ptr (JSContext *ctx, void *ptr) {
return (uint8_t *)ptr >= ctx->stone_base && (uint8_t *)ptr < ctx->stone_end;
/* Helper to check if a pointer is in constant text pool memory */
static inline int is_ct_ptr (JSContext *ctx, void *ptr) {
return (uint8_t *)ptr >= ctx->ct_base && (uint8_t *)ptr < ctx->ct_end;
}
/* Intern a UTF-32 string as a stone text, returning a JSValue string */
@@ -1234,7 +1218,7 @@ typedef struct JSRegExp {
/* Get prototype from object (works for both JSRecord and JSRecord since they
* share layout) */
#define JS_OBJ_GET_PROTO(p) ((JSRecord *)((JSRecord *)(p))->proto)
#define JS_OBJ_GET_PROTO(p) (JS_IsNull(((JSRecord *)(p))->proto) ? NULL : (JSRecord *)JS_VALUE_GET_PTR(((JSRecord *)(p))->proto))
/* Initial capacity for new records (mask = 7, 8 slots total) */
#define JS_RECORD_INITIAL_MASK 7
@@ -1554,9 +1538,9 @@ JSText *pretext_concat_value (JSContext *ctx, JSText *s, JSValue v);
JSValue js_new_blob (JSContext *ctx, blob *b);
/* Functions from header region (defined in runtime.c) */
void *js_realloc (JSContext *ctx, void *ptr, size_t size);
void *st_alloc (JSContext *ctx, size_t bytes, size_t align);
void st_free_all (JSContext *ctx);
int st_text_resize (JSContext *ctx);
void *ct_alloc (JSContext *ctx, size_t bytes, size_t align);
void ct_free_all (JSContext *ctx);
int ct_resize (JSContext *ctx);
JSValue intern_text_to_value (JSContext *ctx, const uint32_t *utf32, uint32_t len);
JSValue js_key_new (JSContext *ctx, const char *str);
JSValue js_key_new_len (JSContext *ctx, const char *str, size_t len);

File diff suppressed because it is too large Load Diff

View File

@@ -35,13 +35,9 @@ var streamline = function(ir, log) {
var T_FUNCTION = "function"
var T_BLOB = "blob"
var int_result_ops = {
add_int: true, sub_int: true, mul_int: true,
div_int: true, mod_int: true
}
var float_result_ops = {
add_float: true, sub_float: true, mul_float: true,
div_float: true, mod_float: true
var numeric_ops = {
add: true, subtract: true, multiply: true,
divide: true, modulo: true, pow: true
}
var bool_result_ops = {
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
@@ -137,17 +133,13 @@ var streamline = function(ir, log) {
} else if (op == "move") {
src_type = slot_types[text(instr[2])]
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
} else if (int_result_ops[op] == true) {
slot_types[text(instr[1])] = T_INT
} else if (float_result_ops[op] == true) {
slot_types[text(instr[1])] = T_FLOAT
} else if (op == "concat") {
slot_types[text(instr[1])] = T_TEXT
} else if (bool_result_ops[op] == true) {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "invoke") {
} else if (op == "invoke" || op == "tail_invoke") {
slot_types[text(instr[2])] = T_UNKNOWN
} else if (op == "pop" || op == "get") {
slot_types[text(instr[1])] = T_UNKNOWN
@@ -159,10 +151,8 @@ var streamline = function(ir, log) {
slot_types[text(instr[1])] = T_FUNCTION
} else if (op == "length") {
slot_types[text(instr[1])] = T_INT
} else if (op == "neg_int") {
slot_types[text(instr[1])] = T_INT
} else if (op == "neg_float") {
slot_types[text(instr[1])] = T_FLOAT
} else if (op == "negate" || numeric_ops[op] == true) {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "bitnot" || op == "bitand" || op == "bitor" ||
op == "bitxor" || op == "shl" || op == "shr" || op == "ushr") {
slot_types[text(instr[1])] = T_INT
@@ -219,6 +209,16 @@ var streamline = function(ir, log) {
return null
}
var seed_writes = function(slot_types, write_types) {
var keys = array(write_types)
var k = 0
while (k < length(keys)) {
slot_types[keys[k]] = write_types[keys[k]]
k = k + 1
}
return null
}
// =========================================================
// Pass: infer_param_types — backward type inference
// Scans typed operators to infer immutable parameter types.
@@ -246,24 +246,24 @@ var streamline = function(ir, log) {
instr = instructions[i]
if (is_array(instr)) {
op = instr[0]
if (op == "add_int" || op == "sub_int" || op == "mul_int" ||
op == "div_int" || op == "mod_int" ||
op == "eq_int" || op == "ne_int" || op == "lt_int" ||
op == "gt_int" || op == "le_int" || op == "ge_int" ||
op == "bitand" || op == "bitor" || op == "bitxor" ||
op == "shl" || op == "shr" || op == "ushr") {
if (op == "subtract" || op == "multiply" ||
op == "divide" || op == "modulo" || op == "pow") {
merge_backward(backward_types, instr[2], T_NUM)
merge_backward(backward_types, instr[3], T_NUM)
} else if (op == "negate") {
merge_backward(backward_types, instr[2], T_NUM)
} else if (op == "eq_int" || op == "ne_int" || op == "lt_int" ||
op == "gt_int" || op == "le_int" || op == "ge_int" ||
op == "bitand" || op == "bitor" || op == "bitxor" ||
op == "shl" || op == "shr" || op == "ushr") {
merge_backward(backward_types, instr[2], T_INT)
merge_backward(backward_types, instr[3], T_INT)
} else if (op == "neg_int" || op == "bitnot") {
} else if (op == "bitnot") {
merge_backward(backward_types, instr[2], T_INT)
} else if (op == "add_float" || op == "sub_float" || op == "mul_float" ||
op == "div_float" || op == "mod_float" ||
op == "eq_float" || op == "ne_float" || op == "lt_float" ||
} else if (op == "eq_float" || op == "ne_float" || op == "lt_float" ||
op == "gt_float" || op == "le_float" || op == "ge_float") {
merge_backward(backward_types, instr[2], T_FLOAT)
merge_backward(backward_types, instr[3], T_FLOAT)
} else if (op == "neg_float") {
merge_backward(backward_types, instr[2], T_FLOAT)
} else if (op == "concat" ||
op == "eq_text" || op == "ne_text" || op == "lt_text" ||
op == "gt_text" || op == "le_text" || op == "ge_text") {
@@ -284,6 +284,13 @@ var streamline = function(ir, log) {
merge_backward(backward_types, instr[1], T_RECORD)
} else if (op == "push") {
merge_backward(backward_types, instr[1], T_ARRAY)
} else if (op == "load_index") {
merge_backward(backward_types, instr[2], T_ARRAY)
merge_backward(backward_types, instr[3], T_INT)
} else if (op == "load_field") {
merge_backward(backward_types, instr[2], T_RECORD)
} else if (op == "pop") {
merge_backward(backward_types, instr[2], T_ARRAY)
}
}
i = i + 1
@@ -301,15 +308,131 @@ var streamline = function(ir, log) {
return param_types
}
// =========================================================
// Pass: infer_slot_write_types — slot write-type invariance
// Scans all instructions to find non-parameter slots where
// every write produces the same type. These types persist
// across label join points.
// =========================================================
var infer_slot_write_types = function(func) {
var instructions = func.instructions
var nr_args = func.nr_args != null ? func.nr_args : 0
var num_instr = 0
var write_types = null
var result = null
var keys = null
var i = 0
var k = 0
var instr = null
var op = null
var slot = 0
var typ = null
var wt = null
if (instructions == null) {
return {}
}
num_instr = length(instructions)
write_types = {}
i = 0
while (i < num_instr) {
instr = instructions[i]
if (!is_array(instr)) {
i = i + 1
continue
}
op = instr[0]
slot = -1
typ = null
if (op == "int") {
slot = instr[1]
typ = T_INT
} else if (op == "true" || op == "false") {
slot = instr[1]
typ = T_BOOL
} else if (op == "null") {
slot = instr[1]
typ = T_NULL
} else if (op == "access") {
slot = instr[1]
typ = access_value_type(instr[2])
} else if (op == "array") {
slot = instr[1]
typ = T_ARRAY
} else if (op == "record") {
slot = instr[1]
typ = T_RECORD
} else if (op == "function") {
slot = instr[1]
typ = T_FUNCTION
} else if (op == "length") {
slot = instr[1]
typ = T_INT
} else if (op == "bitnot" || op == "bitand" ||
op == "bitor" || op == "bitxor" || op == "shl" ||
op == "shr" || op == "ushr") {
slot = instr[1]
typ = T_INT
} else if (op == "negate") {
slot = instr[1]
typ = T_UNKNOWN
} else if (op == "concat") {
slot = instr[1]
typ = T_TEXT
} else if (bool_result_ops[op] == true) {
slot = instr[1]
typ = T_BOOL
} else if (op == "eq" || op == "ne" || op == "lt" ||
op == "le" || op == "gt" || op == "ge" || op == "in") {
slot = instr[1]
typ = T_BOOL
} else if (op == "add" || op == "subtract" || op == "multiply" ||
op == "divide" || op == "modulo" || op == "pow") {
slot = instr[1]
typ = T_UNKNOWN
} else if (op == "move" || op == "load_field" || op == "load_index" ||
op == "load_dynamic" || op == "pop" || op == "get") {
slot = instr[1]
typ = T_UNKNOWN
} else if (op == "invoke" || op == "tail_invoke") {
slot = instr[2]
typ = T_UNKNOWN
}
if (slot > 0 && slot > nr_args) {
merge_backward(write_types, slot, typ != null ? typ : T_UNKNOWN)
}
i = i + 1
}
// Filter to only slots with known (non-unknown) types
result = {}
keys = array(write_types)
k = 0
while (k < length(keys)) {
wt = write_types[keys[k]]
if (wt != null && wt != T_UNKNOWN) {
result[keys[k]] = wt
}
k = k + 1
}
return result
}
// =========================================================
// Pass: eliminate_type_checks — language-level type narrowing
// Eliminates is_<type>/jump pairs when type is known.
// Reduces load_dynamic/store_dynamic to field/index forms.
// =========================================================
var eliminate_type_checks = function(func, param_types, log) {
var eliminate_type_checks = function(func, param_types, write_types, log) {
var instructions = func.instructions
var nr_args = func.nr_args != null ? func.nr_args : 0
var has_params = false
var has_writes = false
var num_instr = 0
var slot_types = null
var nc = 0
@@ -344,11 +467,15 @@ var streamline = function(ir, log) {
}
j = j + 1
}
has_writes = length(array(write_types)) > 0
slot_types = {}
if (has_params) {
seed_params(slot_types, param_types, nr_args)
}
if (has_writes) {
seed_writes(slot_types, write_types)
}
i = 0
while (i < num_instr) {
@@ -359,6 +486,9 @@ var streamline = function(ir, log) {
if (has_params) {
seed_params(slot_types, param_types, nr_args)
}
if (has_writes) {
seed_writes(slot_types, write_types)
}
i = i + 1
continue
}
@@ -603,11 +733,8 @@ var streamline = function(ir, log) {
var instr = null
var op = null
var ilen = 0
var v2 = null
var v3 = null
var sv = null
var events = null
var rule = null
if (instructions == null || length(instructions) == 0) {
return null
@@ -655,179 +782,6 @@ var streamline = function(ir, log) {
}
}
// Integer: x+0, x-0 → move x
if (op == "add_int" || op == "sub_int") {
v3 = slot_values[text(instr[3])]
if (v3 == 0) {
rule = op == "add_int" ? "add_zero" : "sub_zero"
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: rule, at: i,
before: instr, after: instructions[i],
why: {slot: instr[3], value: 0}
}
}
i = i + 1
continue
}
if (op == "add_int") {
v2 = slot_values[text(instr[2])]
if (v2 == 0) {
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "add_zero", at: i,
before: instr, after: instructions[i],
why: {slot: instr[2], value: 0}
}
}
i = i + 1
continue
}
}
} else if (op == "mul_int") {
v3 = slot_values[text(instr[3])]
v2 = slot_values[text(instr[2])]
if (v3 == 1) {
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "mul_one", at: i,
before: instr, after: instructions[i],
why: {slot: instr[3], value: 1}
}
}
i = i + 1
continue
}
if (v2 == 1) {
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "mul_one", at: i,
before: instr, after: instructions[i],
why: {slot: instr[2], value: 1}
}
}
i = i + 1
continue
}
if (v3 == 0 || v2 == 0) {
instructions[i] = ["int", instr[1], 0, instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "mul_zero", at: i,
before: instr, after: instructions[i],
why: {value: 0}
}
}
slot_values[text(instr[1])] = 0
i = i + 1
continue
}
} else if (op == "div_int") {
v3 = slot_values[text(instr[3])]
if (v3 == 1) {
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "div_one", at: i,
before: instr, after: instructions[i],
why: {slot: instr[3], value: 1}
}
}
i = i + 1
continue
}
}
// Float: x+0, x-0 → move x; x*1, x/1 → move x
// (skip mul_float * 0 — not safe with NaN/Inf)
if (op == "add_float" || op == "sub_float") {
v3 = slot_values[text(instr[3])]
if (v3 == 0) {
rule = op == "add_float" ? "add_zero" : "sub_zero"
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: rule, at: i,
before: instr, after: instructions[i],
why: {slot: instr[3], value: 0}
}
}
i = i + 1
continue
}
if (op == "add_float") {
v2 = slot_values[text(instr[2])]
if (v2 == 0) {
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "add_zero", at: i,
before: instr, after: instructions[i],
why: {slot: instr[2], value: 0}
}
}
i = i + 1
continue
}
}
} else if (op == "mul_float") {
v3 = slot_values[text(instr[3])]
v2 = slot_values[text(instr[2])]
if (v3 == 1) {
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "mul_one", at: i,
before: instr, after: instructions[i],
why: {slot: instr[3], value: 1}
}
}
i = i + 1
continue
}
if (v2 == 1) {
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "mul_one", at: i,
before: instr, after: instructions[i],
why: {slot: instr[2], value: 1}
}
}
i = i + 1
continue
}
} else if (op == "div_float") {
v3 = slot_values[text(instr[3])]
if (v3 == 1) {
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
if (events != null) {
events[] = {
event: "rewrite", pass: "simplify_algebra",
rule: "div_one", at: i,
before: instr, after: instructions[i],
why: {slot: instr[3], value: 1}
}
}
i = i + 1
continue
}
}
// Same-slot comparisons
if (is_number(instr[2]) && instr[2] == instr[3]) {
if (op == "eq_int" || op == "eq_float" || op == "eq_text" ||
@@ -867,7 +821,7 @@ var streamline = function(ir, log) {
}
// Clear value tracking for dest-producing ops (not reads-only)
if (op == "invoke") {
if (op == "invoke" || op == "tail_invoke") {
slot_values[text(instr[2])] = null
} else if (op != "int" && op != "access" && op != "true" &&
op != "false" && op != "move" && op != "null" &&
@@ -1050,7 +1004,7 @@ var streamline = function(ir, log) {
if (after_return) {
nc = nc + 1
instructions[i] = "_nop_ur_" + text(nc)
} else if (instr[0] == "return" || instr[0] == "disrupt") {
} else if (instr[0] == "return") {
after_return = true
}
}
@@ -1123,6 +1077,7 @@ var streamline = function(ir, log) {
// =========================================================
var optimize_function = function(func, log) {
var param_types = null
var write_types = null
var slot_types = null
if (func.instructions == null || length(func.instructions) == 0) {
return null
@@ -1132,8 +1087,13 @@ var streamline = function(ir, log) {
return param_types
})
if (verify_fn) verify_fn(func, "after infer_param_types")
run_pass(func, "infer_slot_write_types", function() {
write_types = infer_slot_write_types(func)
return write_types
})
if (verify_fn) verify_fn(func, "after infer_slot_write_types")
run_pass(func, "eliminate_type_checks", function() {
slot_types = eliminate_type_checks(func, param_types, log)
slot_types = eliminate_type_checks(func, param_types, write_types, log)
return slot_types
})
if (verify_fn) verify_fn(func, "after eliminate_type_checks")
@@ -1156,10 +1116,10 @@ var streamline = function(ir, log) {
return eliminate_moves(func, log)
})
if (verify_fn) verify_fn(func, "after eliminate_moves")
// NOTE: eliminate_unreachable is disabled because disruption handler
// code is placed after return/disrupt without label boundaries.
// Re-enable once mcode.cm emits labels for handler entry points.
//eliminate_unreachable(func)
run_pass(func, "eliminate_unreachable", function() {
return eliminate_unreachable(func)
})
if (verify_fn) verify_fn(func, "after eliminate_unreachable")
run_pass(func, "eliminate_dead_jumps", function() {
return eliminate_dead_jumps(func, log)
})

View File

@@ -18,8 +18,7 @@ var slot_positions = {
// Unary — dest, src
move: [0, 1],
not: [0, 1],
neg_int: [0, 1],
neg_float: [0, 1],
negate: [0, 1],
bitnot: [0, 1],
length: [0, 1],
typeof: [0, 1],
@@ -41,16 +40,6 @@ var slot_positions = {
divide: [0, 1, 2],
modulo: [0, 1, 2],
pow: [0, 1, 2],
add_int: [0, 1, 2],
sub_int: [0, 1, 2],
mul_int: [0, 1, 2],
div_int: [0, 1, 2],
mod_int: [0, 1, 2],
add_float: [0, 1, 2],
sub_float: [0, 1, 2],
mul_float: [0, 1, 2],
div_float: [0, 1, 2],
mod_float: [0, 1, 2],
eq: [0, 1, 2],
ne: [0, 1, 2],
lt: [0, 1, 2],
@@ -113,6 +102,7 @@ var slot_positions = {
// Invoke
invoke: [0, 1],
tail_invoke: [0, 1],
goinvoke: [0],
frame: [0, 1],
setarg: [0, 2]
@@ -122,15 +112,13 @@ var slot_positions = {
var writes_dest = {
access: true, int: true, true: true, false: true, null: true,
function: true, array: true, record: true,
move: true, not: true, neg_int: true, neg_float: true, bitnot: true,
move: true, not: true, negate: true, bitnot: true,
length: true, typeof: true,
is_int: true, is_text: true, is_num: true,
is_bool: true, is_null: true, is_array: true,
is_func: true, is_record: true, is_stone: true, is_identical: true,
add: true, subtract: true, multiply: true, divide: true,
modulo: true, pow: true,
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
eq: true, ne: true, lt: true, le: true, gt: true, ge: true,
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
@@ -141,7 +129,8 @@ var writes_dest = {
in: true,
load_index: true, load_dynamic: true, load_field: true,
pop: true, get: true,
invoke: true
invoke: true,
tail_invoke: true
}
// Opcodes where invoke writes to position 1 (result slot), not position 0
@@ -260,14 +249,10 @@ var check_type_consistency = function(func) {
var T_BOOL = "bool"
var int_ops = {
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
neg_int: true
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true
}
var float_ops = {
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
neg_float: true
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true
}
var text_ops = {
eq_text: true, ne_text: true, lt_text: true, gt_text: true, le_text: true, ge_text: true,