Merge branch 'mcode_streamline' into runtime_rework
This commit is contained in:
13
CLAUDE.md
13
CLAUDE.md
@@ -113,6 +113,19 @@ var v = a[] // pop: v is 3, a is [1, 2]
|
||||
- `packages/` — core packages
|
||||
- `Makefile` — build system (`make` to rebuild, `make bootstrap` for first build)
|
||||
|
||||
## Testing
|
||||
|
||||
After any C runtime changes, run all three test suites before considering the work done:
|
||||
|
||||
```
|
||||
make # rebuild
|
||||
./cell --dev vm_suite # VM-level tests (641 tests)
|
||||
./cell --dev test suite # language-level tests (493 tests)
|
||||
./cell --dev fuzz # fuzzer (100 iterations)
|
||||
```
|
||||
|
||||
All three must pass with 0 failures.
|
||||
|
||||
## Documentation
|
||||
|
||||
The `docs/` folder is the single source of truth. The website at `website/` mounts it via Hugo. Key files:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
8041
boot/engine.cm.mcode
8041
boot/engine.cm.mcode
File diff suppressed because it is too large
Load Diff
44083
boot/fold.cm.mcode
44083
boot/fold.cm.mcode
File diff suppressed because it is too large
Load Diff
48892
boot/mcode.cm.mcode
48892
boot/mcode.cm.mcode
File diff suppressed because it is too large
Load Diff
48984
boot/parse.cm.mcode
48984
boot/parse.cm.mcode
File diff suppressed because it is too large
Load Diff
1910
boot/qbe.cm.mcode
1910
boot/qbe.cm.mcode
File diff suppressed because it is too large
Load Diff
21700
boot/qbe_emit.cm.mcode
21700
boot/qbe_emit.cm.mcode
File diff suppressed because it is too large
Load Diff
153
boot/seed_bootstrap.cm
Normal file
153
boot/seed_bootstrap.cm
Normal file
@@ -0,0 +1,153 @@
|
||||
// seed_bootstrap.cm — minimal bootstrap for regenerating boot files
|
||||
// Loads only the compiler pipeline, runs a script directly (no engine/actors)
|
||||
// Usage: ./cell --dev --seed regen
|
||||
//
|
||||
// Hidden env: os, core_path, shop_path, args, json
|
||||
|
||||
var load_internal = os.load_internal
|
||||
var fd = load_internal("js_fd_use")
|
||||
|
||||
var use_cache = {}
|
||||
use_cache['fd'] = fd
|
||||
use_cache['os'] = os
|
||||
use_cache['json'] = json
|
||||
|
||||
function use_basic(path) {
|
||||
if (use_cache[path])
|
||||
return use_cache[path]
|
||||
var result = load_internal("js_" + replace(path, '/', '_') + "_use")
|
||||
if (result) {
|
||||
use_cache[path] = result
|
||||
return result
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// Load a module from boot .mcode — no caching, just eval
|
||||
function boot_load(name) {
|
||||
var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
|
||||
var mcode_json = null
|
||||
if (!fd.is_file(mcode_path)) {
|
||||
print("seed: missing boot mcode: " + mcode_path + "\n")
|
||||
disrupt
|
||||
}
|
||||
mcode_json = text(fd.slurp(mcode_path))
|
||||
return mach_eval_mcode(name, mcode_json, {use: use_basic})
|
||||
}
|
||||
|
||||
var tokenize_mod = boot_load("tokenize")
|
||||
var parse_mod = boot_load("parse")
|
||||
var fold_mod = boot_load("fold")
|
||||
var mcode_mod = boot_load("mcode")
|
||||
var streamline_mod = boot_load("streamline")
|
||||
|
||||
use_cache['tokenize'] = tokenize_mod
|
||||
use_cache['parse'] = parse_mod
|
||||
use_cache['fold'] = fold_mod
|
||||
use_cache['mcode'] = mcode_mod
|
||||
use_cache['streamline'] = streamline_mod
|
||||
|
||||
function analyze(src, filename) {
|
||||
var tok_result = tokenize_mod(src, filename)
|
||||
var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
|
||||
var _i = 0
|
||||
var e = null
|
||||
var has_errors = ast.errors != null && length(ast.errors) > 0
|
||||
if (has_errors) {
|
||||
while (_i < length(ast.errors)) {
|
||||
e = ast.errors[_i]
|
||||
if (e.line != null) {
|
||||
print(`${filename}:${text(e.line)}:${text(e.column)}: error: ${e.message}`)
|
||||
} else {
|
||||
print(`${filename}: error: ${e.message}`)
|
||||
}
|
||||
_i = _i + 1
|
||||
}
|
||||
disrupt
|
||||
}
|
||||
return fold_mod(ast)
|
||||
}
|
||||
|
||||
function run_ast(name, ast, env) {
|
||||
var compiled = mcode_mod(ast)
|
||||
var optimized = streamline_mod(compiled)
|
||||
var mcode_json = json.encode(optimized)
|
||||
return mach_eval_mcode(name, mcode_json, env)
|
||||
}
|
||||
|
||||
function use_fn(path) {
|
||||
var result = null
|
||||
var file_path = null
|
||||
var script = null
|
||||
var ast = null
|
||||
var mcode_path = null
|
||||
var mcode_json = null
|
||||
if (use_cache[path])
|
||||
return use_cache[path]
|
||||
|
||||
// Try C embed
|
||||
result = load_internal("js_" + replace(path, '/', '_') + "_use")
|
||||
if (result) {
|
||||
use_cache[path] = result
|
||||
return result
|
||||
}
|
||||
|
||||
// Try boot mcode
|
||||
mcode_path = core_path + '/boot/' + path + '.cm.mcode'
|
||||
if (fd.is_file(mcode_path)) {
|
||||
mcode_json = text(fd.slurp(mcode_path))
|
||||
result = mach_eval_mcode(path, mcode_json, {use: use_fn})
|
||||
use_cache[path] = result
|
||||
return result
|
||||
}
|
||||
|
||||
// Try .cm source (CWD then core)
|
||||
file_path = path + '.cm'
|
||||
if (!fd.is_file(file_path))
|
||||
file_path = core_path + '/' + path + '.cm'
|
||||
if (fd.is_file(file_path)) {
|
||||
script = text(fd.slurp(file_path))
|
||||
ast = analyze(script, file_path)
|
||||
result = run_ast(path, ast, {use: use_fn})
|
||||
use_cache[path] = result
|
||||
return result
|
||||
}
|
||||
|
||||
print("seed: module not found: " + path + "\n")
|
||||
disrupt
|
||||
}
|
||||
|
||||
// Run the program from args
|
||||
var program = args[0]
|
||||
var user_args = []
|
||||
var _j = 1
|
||||
var prog_path = null
|
||||
var script = null
|
||||
var ast = null
|
||||
|
||||
if (!program) {
|
||||
print("seed: no program specified\n")
|
||||
disrupt
|
||||
}
|
||||
|
||||
while (_j < length(args)) {
|
||||
push(user_args, args[_j])
|
||||
_j = _j + 1
|
||||
}
|
||||
|
||||
prog_path = program + '.ce'
|
||||
if (!fd.is_file(prog_path))
|
||||
prog_path = core_path + '/' + program + '.ce'
|
||||
if (!fd.is_file(prog_path)) {
|
||||
prog_path = program + '.cm'
|
||||
if (!fd.is_file(prog_path))
|
||||
prog_path = core_path + '/' + program + '.cm'
|
||||
}
|
||||
if (!fd.is_file(prog_path)) {
|
||||
print("seed: program not found: " + program + "\n")
|
||||
disrupt
|
||||
}
|
||||
|
||||
script = text(fd.slurp(prog_path))
|
||||
ast = analyze(script, prog_path)
|
||||
run_ast(program, ast, {use: use_fn, args: user_args})
|
||||
1344
boot/seed_bootstrap.cm.mcode
Normal file
1344
boot/seed_bootstrap.cm.mcode
Normal file
File diff suppressed because it is too large
Load Diff
35112
boot/streamline.cm.mcode
35112
boot/streamline.cm.mcode
File diff suppressed because it is too large
Load Diff
10861
boot/tokenize.cm.mcode
10861
boot/tokenize.cm.mcode
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
42
bootstrap.ce
42
bootstrap.ce
@@ -1,42 +0,0 @@
|
||||
// bootstrap.ce — regenerate .mach bytecode files consumed by the mach engine
|
||||
// usage: cell bootstrap.ce
|
||||
|
||||
var fd = use("fd")
|
||||
var json = use("json")
|
||||
var tokenize = use("tokenize")
|
||||
var parse = use("parse")
|
||||
var fold = use("fold")
|
||||
|
||||
var files = [
|
||||
{src: "tokenize.cm", name: "tokenize", out: "tokenize.mach"},
|
||||
{src: "parse.cm", name: "parse", out: "parse.mach"},
|
||||
{src: "fold.cm", name: "fold", out: "fold.mach"},
|
||||
{src: "mcode.cm", name: "mcode", out: "mcode.mach"},
|
||||
{src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"},
|
||||
{src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"}
|
||||
]
|
||||
|
||||
var i = 0
|
||||
var entry = null
|
||||
var src = null
|
||||
var tok_result = null
|
||||
var ast = null
|
||||
var folded = null
|
||||
var ast_json = null
|
||||
var bytecode = null
|
||||
var f = null
|
||||
|
||||
while (i < length(files)) {
|
||||
entry = files[i]
|
||||
src = text(fd.slurp(entry.src))
|
||||
tok_result = tokenize(src, entry.src)
|
||||
ast = parse(tok_result.tokens, src, entry.src, tokenize)
|
||||
folded = fold(ast)
|
||||
ast_json = json.encode(folded)
|
||||
bytecode = mach_compile_ast(entry.name, ast_json)
|
||||
f = fd.open(entry.out, "w")
|
||||
fd.write(f, bytecode)
|
||||
fd.close(f)
|
||||
print(`wrote ${entry.out}`)
|
||||
i = i + 1
|
||||
}
|
||||
@@ -27,7 +27,8 @@ Splits source text into tokens. Handles string interpolation by re-tokenizing te
|
||||
Converts tokens into an AST. Also performs semantic analysis:
|
||||
|
||||
- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`.
|
||||
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`, `"null"`.
|
||||
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`. For `def` variables, type tags are also inferred from usage patterns: push (`x[] = v`) implies array, property access (`x.foo = v`) implies record, integer key implies array, text key implies record.
|
||||
- **Type error detection**: For `def` variables with known type tags, provably wrong operations are reported as compile errors: property access on arrays, push on non-arrays, text keys on arrays, integer keys on records. Only `def` variables are checked because `var` can be reassigned.
|
||||
- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`.
|
||||
- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise.
|
||||
- **Tail position**: Return statements where the expression is a call get `tail: true`.
|
||||
@@ -40,8 +41,8 @@ Operates on the AST. Performs constant folding and type analysis:
|
||||
- **Constant propagation**: Tracks `def` bindings whose values are known constants.
|
||||
- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites.
|
||||
- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`.
|
||||
- **Purity marking**: Stamps `pure: true` on expressions with no side effects (literals, name references, arithmetic on pure operands).
|
||||
- **Dead code elimination**: Removes unreachable branches when conditions are known constants.
|
||||
- **Purity analysis**: Expressions with no side effects are marked pure (literals, name references, arithmetic on pure operands, calls to pure intrinsics). The pure intrinsic set contains only `is_*` sensory functions — they are the only intrinsics guaranteed to never disrupt regardless of argument types. Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types and are excluded.
|
||||
- **Dead code elimination**: Removes unreachable branches when conditions are known constants. Removes unused `var`/`def` declarations with pure initializers. Removes standalone calls to pure intrinsics where the result is discarded.
|
||||
|
||||
### Mcode (`mcode.cm`)
|
||||
|
||||
@@ -51,6 +52,8 @@ Lowers the AST to a JSON-based intermediate representation with explicit operati
|
||||
- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call).
|
||||
- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup.
|
||||
- **Intrinsic inlining**: Type-check intrinsics (`is_array`, `is_text`, `is_number`, `is_integer`, `is_logical`, `is_null`, `is_function`, `is_object`, `is_stone`), `length`, and `push` are emitted as direct opcodes instead of frame/setarg/invoke call sequences.
|
||||
- **Disruption handler labels**: When a function has a disruption handler, a label is emitted before the handler code. This allows the streamline optimizer's unreachable code elimination to safely nop dead code after `return` without accidentally eliminating the handler.
|
||||
- **Tail call marking**: When a return statement's expression is a call and the function has no disruption handler, the final `invoke` is renamed to `tail_invoke`. This marks the call site for future tail call optimization. Functions with disruption handlers cannot use TCO because the handler frame must remain on the stack.
|
||||
|
||||
See [Mcode IR](mcode.md) for the instruction format and complete instruction reference.
|
||||
|
||||
@@ -58,12 +61,13 @@ See [Mcode IR](mcode.md) for the instruction format and complete instruction ref
|
||||
|
||||
Optimizes the Mcode IR through a series of independent passes. Operates per-function:
|
||||
|
||||
1. **Backward type inference**: Infers parameter types from how they are used in typed operators. Immutable `def` parameters keep their inferred type across label join points.
|
||||
1. **Backward type inference**: Infers parameter types from how they are used in typed operators (`add_int`, `store_index`, `load_field`, `push`, `pop`, etc.). Immutable `def` parameters keep their inferred type across label join points.
|
||||
2. **Type-check elimination**: When a slot's type is known, eliminates `is_<type>` + conditional jump pairs. Narrows `load_dynamic`/`store_dynamic` to typed variants.
|
||||
3. **Algebraic simplification**: Rewrites identity operations (add 0, multiply 1, divide 1) and folds same-slot comparisons.
|
||||
4. **Boolean simplification**: Fuses `not` + conditional jump into a single jump with inverted condition.
|
||||
5. **Move elimination**: Removes self-moves (`move a, a`).
|
||||
6. **Dead jump elimination**: Removes jumps to the immediately following label.
|
||||
6. **Unreachable elimination**: Nops dead code after `return` until the next label.
|
||||
7. **Dead jump elimination**: Removes jumps to the immediately following label.
|
||||
|
||||
See [Streamline Optimizer](streamline.md) for detailed pass descriptions.
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ Subsumption: `int` and `float` both satisfy a `num` check.
|
||||
|
||||
### 1. infer_param_types (backward type inference)
|
||||
|
||||
Scans all typed operators to determine what types their operands must be. For example, `add_int dest, a, b` implies both `a` and `b` are integers.
|
||||
Scans typed operators and generic arithmetic to determine what types their operands must be. For example, `subtract dest, a, b` implies both `a` and `b` are numbers.
|
||||
|
||||
When a parameter slot (1..nr_args) is consistently inferred as a single type, that type is recorded. Since parameters are immutable (`def`), the inferred type holds for the entire function and persists across label join points (loop headers, branch targets).
|
||||
|
||||
@@ -45,20 +45,67 @@ Backward inference rules:
|
||||
|
||||
| Operator class | Operand type inferred |
|
||||
|---|---|
|
||||
| `add_int`, `sub_int`, `mul_int`, `div_int`, `mod_int`, `eq_int`, comparisons, bitwise | T_INT |
|
||||
| `add_float`, `sub_float`, `mul_float`, `div_float`, `mod_float`, float comparisons | T_FLOAT |
|
||||
| `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate` | T_NUM |
|
||||
| `eq_int`, `ne_int`, `lt_int`, `gt_int`, `le_int`, `ge_int`, bitwise ops | T_INT |
|
||||
| `eq_float`, `ne_float`, `lt_float`, `gt_float`, `le_float`, `ge_float` | T_FLOAT |
|
||||
| `concat`, text comparisons | T_TEXT |
|
||||
| `eq_bool`, `ne_bool`, `not`, `and`, `or` | T_BOOL |
|
||||
| `store_index` (object operand) | T_ARRAY |
|
||||
| `store_index` (index operand) | T_INT |
|
||||
| `store_field` (object operand) | T_RECORD |
|
||||
| `push` (array operand) | T_ARRAY |
|
||||
| `load_index` (object operand) | T_ARRAY |
|
||||
| `load_index` (index operand) | T_INT |
|
||||
| `load_field` (object operand) | T_RECORD |
|
||||
| `pop` (array operand) | T_ARRAY |
|
||||
|
||||
When a slot appears with conflicting type inferences (e.g., used in both `add_int` and `concat` across different type-dispatch branches), the result is `unknown`. INT + FLOAT conflicts produce `num`.
|
||||
Note: `add` is excluded from backward inference because it is polymorphic — it handles both numeric addition and text concatenation. Only operators that are unambiguously numeric can infer T_NUM.
|
||||
|
||||
When a slot appears with conflicting type inferences, the result is `unknown`. INT + FLOAT conflicts produce `num`.
|
||||
|
||||
**Nop prefix:** none (analysis only, does not modify instructions)
|
||||
|
||||
### 2. eliminate_type_checks (type-check + jump elimination)
|
||||
### 2. infer_slot_write_types (slot write-type invariance)
|
||||
|
||||
Scans all instructions to determine which non-parameter slots have a consistent write type. If every instruction that writes to a given slot produces the same type, that type is globally invariant and can safely persist across label join points.
|
||||
|
||||
This analysis is sound because:
|
||||
- `alloc_slot()` in mcode.cm is monotonically increasing — temp slots are never reused
|
||||
- All local variable declarations must be at function body level and initialized — slots are written before any backward jumps to loop headers
|
||||
- `move` is conservatively treated as T_UNKNOWN, avoiding unsound transitive assumptions
|
||||
|
||||
Write type mapping:
|
||||
|
||||
| Instruction class | Write type |
|
||||
|---|---|
|
||||
| `int` | T_INT |
|
||||
| `true`, `false` | T_BOOL |
|
||||
| `null` | T_NULL |
|
||||
| `access` | type of literal value |
|
||||
| `array` | T_ARRAY |
|
||||
| `record` | T_RECORD |
|
||||
| `function` | T_FUNCTION |
|
||||
| `length` | T_INT |
|
||||
| bitwise ops | T_INT |
|
||||
| `concat` | T_TEXT |
|
||||
| bool ops, comparisons, `in` | T_BOOL |
|
||||
| generic arithmetic (`add`, `subtract`, `negate`, etc.) | T_UNKNOWN |
|
||||
| `move`, `load_field`, `load_index`, `load_dynamic`, `pop`, `get` | T_UNKNOWN |
|
||||
| `invoke`, `tail_invoke` | T_UNKNOWN |
|
||||
|
||||
The result is a map of slot→type for slots where all writes agree on a single known type. Parameter slots (1..nr_args) and slot 0 are excluded.
|
||||
|
||||
Common patterns this enables:
|
||||
|
||||
- **Length variables** (`var len = length(arr)`): written by `length` (T_INT) only → invariant T_INT
|
||||
- **Boolean flags** (`var found = false; ... found = true`): written by `false` and `true` → invariant T_BOOL
|
||||
- **Locally-created containers** (`var arr = []`): written by `array` only → invariant T_ARRAY
|
||||
|
||||
Note: Loop counters (`var i = 0; i = i + 1`) are NOT invariant because `add` produces T_UNKNOWN. However, if `i` is a function parameter used in arithmetic, backward inference from `subtract`/`multiply`/etc. will infer T_NUM for it, which persists across labels.
|
||||
|
||||
**Nop prefix:** none (analysis only, does not modify instructions)
|
||||
|
||||
### 3. eliminate_type_checks (type-check + jump elimination)
|
||||
|
||||
Forward pass that tracks the known type of each slot. When a type check (`is_int`, `is_text`, `is_num`, etc.) is followed by a conditional jump, and the slot's type is already known, the check and jump can be eliminated or converted to an unconditional jump.
|
||||
|
||||
@@ -70,30 +117,13 @@ Three cases:
|
||||
|
||||
This pass also reduces `load_dynamic`/`store_dynamic` to `load_field`/`store_field` or `load_index`/`store_index` when the key slot's type is known.
|
||||
|
||||
At label join points, all type information is reset except for parameter types seeded by the backward inference pass.
|
||||
At label join points, all type information is reset except for parameter types from backward inference and write-invariant types from slot write-type analysis.
|
||||
|
||||
**Nop prefix:** `_nop_tc_`
|
||||
|
||||
### 3. simplify_algebra (algebraic identity + comparison folding)
|
||||
### 4. simplify_algebra (same-slot comparison folding)
|
||||
|
||||
Tracks known constant values alongside types. Rewrites identity operations:
|
||||
|
||||
| Pattern | Rewrite |
|
||||
|---------|---------|
|
||||
| `add_int dest, x, 0` | `move dest, x` |
|
||||
| `add_int dest, 0, x` | `move dest, x` |
|
||||
| `sub_int dest, x, 0` | `move dest, x` |
|
||||
| `mul_int dest, x, 1` | `move dest, x` |
|
||||
| `mul_int dest, 1, x` | `move dest, x` |
|
||||
| `mul_int dest, x, 0` | `int dest, 0` |
|
||||
| `div_int dest, x, 1` | `move dest, x` |
|
||||
| `add_float dest, x, 0` | `move dest, x` |
|
||||
| `mul_float dest, x, 1` | `move dest, x` |
|
||||
| `div_float dest, x, 1` | `move dest, x` |
|
||||
|
||||
Float multiplication by zero is intentionally not optimized because it is not safe with NaN and Inf values.
|
||||
|
||||
Same-slot comparison folding:
|
||||
Tracks known constant values. Folds same-slot comparisons:
|
||||
|
||||
| Pattern | Rewrite |
|
||||
|---------|---------|
|
||||
@@ -107,7 +137,7 @@ Same-slot comparison folding:
|
||||
|
||||
**Nop prefix:** none (rewrites in place, does not create nops)
|
||||
|
||||
### 4. simplify_booleans (not + jump fusion)
|
||||
### 5. simplify_booleans (not + jump fusion)
|
||||
|
||||
Peephole pass that eliminates unnecessary `not` instructions:
|
||||
|
||||
@@ -121,21 +151,21 @@ This is particularly effective on `if (!cond)` patterns, which the compiler gene
|
||||
|
||||
**Nop prefix:** `_nop_bl_`
|
||||
|
||||
### 5. eliminate_moves (self-move elimination)
|
||||
### 6. eliminate_moves (self-move elimination)
|
||||
|
||||
Removes `move a, a` instructions where the source and destination are the same slot. These can arise from earlier passes rewriting binary operations into moves.
|
||||
|
||||
**Nop prefix:** `_nop_mv_`
|
||||
|
||||
### 6. eliminate_unreachable (dead code after return/disrupt)
|
||||
### 7. eliminate_unreachable (dead code after return)
|
||||
|
||||
*Currently disabled.* Nops instructions after `return` or `disrupt` until the next real label.
|
||||
Nops instructions after `return` until the next real label. Only `return` is treated as a terminal instruction; `disrupt` is not, because the disruption handler code immediately follows `disrupt` and must remain reachable.
|
||||
|
||||
Disabled because disruption handler code is placed after the `return`/`disrupt` instruction without a label boundary. The VM dispatches to handlers via the `disruption_pc` offset, not through normal control flow. Re-enabling this pass requires the mcode compiler to emit labels at disruption handler entry points.
|
||||
The mcode compiler emits a label at disruption handler entry points (see `emit_label(gen_label("disruption"))` in mcode.cm), which provides the label boundary that stops this pass from eliminating handler code.
|
||||
|
||||
**Nop prefix:** `_nop_ur_`
|
||||
|
||||
### 7. eliminate_dead_jumps (jump-to-next-label elimination)
|
||||
### 8. eliminate_dead_jumps (jump-to-next-label elimination)
|
||||
|
||||
Removes `jump L` instructions where `L` is the immediately following label (skipping over any intervening nop strings). These are common after other passes eliminate conditional branches, leaving behind jumps that fall through naturally.
|
||||
|
||||
@@ -146,12 +176,13 @@ Removes `jump L` instructions where `L` is the immediately following label (skip
|
||||
All passes run in sequence in `optimize_function`:
|
||||
|
||||
```
|
||||
infer_param_types → returns param_types map
|
||||
eliminate_type_checks → uses param_types
|
||||
infer_param_types → returns param_types map
|
||||
infer_slot_write_types → returns write_types map
|
||||
eliminate_type_checks → uses param_types + write_types
|
||||
simplify_algebra
|
||||
simplify_booleans
|
||||
eliminate_moves
|
||||
(eliminate_unreachable) → disabled
|
||||
eliminate_unreachable
|
||||
eliminate_dead_jumps
|
||||
```
|
||||
|
||||
@@ -177,6 +208,16 @@ Before streamlining, `mcode.cm` recognizes calls to built-in intrinsic functions
|
||||
|
||||
These inlined opcodes have corresponding Mach VM implementations in `mach.c`.
|
||||
|
||||
## Unified Arithmetic
|
||||
|
||||
Arithmetic operations use generic opcodes: `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate`. There are no type-dispatched variants (e.g., no `add_int`/`add_float`).
|
||||
|
||||
The Mach VM dispatches at runtime with an int-first fast path via `reg_vm_binop()`: it checks `JS_VALUE_IS_BOTH_INT` first for fast integer arithmetic, then falls back to float conversion, text concatenation (for `add` only), or type error.
|
||||
|
||||
Bitwise operations (`shl`, `shr`, `ushr`, `bitand`, `bitor`, `bitxor`, `bitnot`) remain integer-only and disrupt if operands are not integers.
|
||||
|
||||
The QBE/native backend maps generic arithmetic to helper calls (`qbe.add`, `qbe.sub`, etc.). The vision for the native path is that with sufficient type inference, the backend can unbox proven-numeric values to raw registers, operate directly, and only rebox at boundaries (returns, calls, stores).
|
||||
|
||||
## Debugging Tools
|
||||
|
||||
Three dump tools inspect the IR at different stages:
|
||||
@@ -192,6 +233,124 @@ Usage:
|
||||
./cell --core . dump_types.cm <file.ce|file.cm>
|
||||
```
|
||||
|
||||
## Tail Call Marking
|
||||
|
||||
When a function's return expression is a call (`stmt.tail == true` from the parser) and the function has no disruption handler, mcode.cm renames the final `invoke` instruction to `tail_invoke`. This is semantically identical to `invoke` in the current Mach VM, but marks the call site for future tail call optimization.
|
||||
|
||||
The disruption handler restriction exists because TCO would discard the current frame, but the handler must remain on the stack to catch disruptions from the callee.
|
||||
|
||||
`tail_invoke` is handled by the same passes as `invoke` in streamline (type tracking, algebraic simplification) and executes identically in the VM.
|
||||
|
||||
## Type Propagation Architecture
|
||||
|
||||
Type information flows through three compilation stages, each building on the previous:
|
||||
|
||||
### Stage 1: Parse-time type tags (parse.cm)
|
||||
|
||||
The parser assigns `type_tag` strings to scope variable entries when the type is syntactically obvious:
|
||||
|
||||
- **From initializers**: `def a = []` → `type_tag: "array"`, `def n = 42` → `type_tag: "integer"`, `def r = {}` → `type_tag: "record"`
|
||||
- **From usage patterns** (def only): `def x = null; x[] = v` infers `type_tag: "array"` from the push. `def x = null; x.foo = v` infers `type_tag: "record"` from property access.
|
||||
- **Type error detection** (def only): When a `def` variable has a known type_tag, provably wrong operations are compile errors:
|
||||
- Property access (`.`) on array
|
||||
- Push (`[]`) on non-array
|
||||
- Text key on array
|
||||
- Integer key on record
|
||||
|
||||
Only `def` (constant) variables participate in type inference and error detection. `var` variables can be reassigned, making their initializer type unreliable.
|
||||
|
||||
### Stage 2: Fold-time type propagation (fold.cm)
|
||||
|
||||
The fold pass extends type information through the AST:
|
||||
|
||||
- **Intrinsic folding**: `is_array(known_array)` folds to `true`. `length(known_array)` gets `hint: "array_length"`.
|
||||
- **Purity analysis**: Expressions involving only `is_*` intrinsic calls with pure arguments are considered pure. This enables dead code elimination for unused `var`/`def` bindings with pure initializers, and elimination of standalone pure call statements.
|
||||
- **Dead code**: Unused pure `var`/`def` declarations are removed. Standalone calls to pure intrinsics (where the result is discarded) are removed. Unreachable branches with constant conditions are removed.
|
||||
|
||||
The `pure_intrinsics` set currently contains only `is_*` sensory functions (`is_array`, `is_text`, `is_number`, `is_integer`, `is_function`, `is_logical`, `is_null`, `is_object`, `is_stone`). Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types, so they are excluded — removing a call that would disrupt changes observable behavior.
|
||||
|
||||
### Stage 3: Streamline-time type tracking (streamline.cm)
|
||||
|
||||
The streamline optimizer uses a numeric type lattice (`T_INT`, `T_FLOAT`, `T_TEXT`, etc.) for fine-grained per-instruction tracking:
|
||||
|
||||
- **Backward inference** (pass 1): Scans typed operators to infer parameter types. Since parameters are `def` (immutable), inferred types persist across label boundaries.
|
||||
- **Write-type invariance** (pass 2): Scans all instructions to find local slots where every write produces the same type. These invariant types persist across label boundaries alongside parameter types.
|
||||
- **Forward tracking** (pass 3): `track_types` follows instruction execution order, tracking the type of each slot. Known-type operations set their destination type (e.g., `concat` → T_TEXT, `length` → T_INT). Generic arithmetic produces T_UNKNOWN. Type checks on unknown slots narrow the type on fallthrough.
|
||||
- **Type check elimination** (pass 3): When a slot's type is already known, `is_<type>` + conditional jump pairs are eliminated or converted to unconditional jumps.
|
||||
- **Dynamic access narrowing** (pass 3): `load_dynamic`/`store_dynamic` are narrowed to `load_field`/`store_field` or `load_index`/`store_index` when the key type is known.
|
||||
|
||||
Type information resets at label join points (since control flow merges could bring different types), except for parameter types from backward inference and write-invariant types from slot write-type analysis.
|
||||
|
||||
## Future Work
|
||||
|
||||
### Copy Propagation
|
||||
|
||||
A basic-block-local copy propagation pass would replace uses of a copied variable with its source, enabling further move elimination. An implementation was attempted but encountered an unsolved bug where 2-position instruction operand replacement produces incorrect code during self-hosting (the replacement logic for 3-position instructions works correctly). The root cause is not yet understood. See the project memory files for detailed notes.
|
||||
|
||||
### Expanded Purity Analysis
|
||||
|
||||
The current purity set is conservative (only `is_*`). It could be expanded by:
|
||||
|
||||
- **Argument-type-aware purity**: If all arguments to an intrinsic are known to be the correct types (via type_tag or slot_types), the call cannot disrupt and is safe to eliminate. For example, `length(known_array)` is pure but `length(unknown)` is not.
|
||||
- **User function purity**: Analyze user-defined function bodies during pre_scan. A function is pure if its body contains only pure expressions and calls to known-pure functions. This requires fixpoint iteration for mutual recursion.
|
||||
- **Callback-aware purity**: Intrinsics like `filter`, `find`, `reduce`, `some`, `every` are pure if their callback argument is pure.
|
||||
|
||||
### Forward Type Narrowing from Typed Operations
|
||||
|
||||
With unified arithmetic (generic `add`/`subtract`/`multiply`/`divide`/`modulo`/`negate` instead of typed variants), this approach is no longer applicable. Typed comparisons (`eq_int`, `lt_float`, etc.) still exist and their operands have known types, but these are already handled by backward inference.
|
||||
|
||||
### Guard Hoisting for Parameters
|
||||
|
||||
When a type check on a parameter passes (falls through), the parameter's type could be promoted to `param_types` so it persists across label boundaries. This would allow the first type check on a parameter to prove its type for the entire function. However, this is unsound for polymorphic parameters — if a function is called with different argument types, the first check would wrongly eliminate checks for subsequent types.
|
||||
|
||||
A safe version would require proving that a parameter is monomorphic (called with only one type across all call sites), which requires interprocedural analysis.
|
||||
|
||||
**Note:** For local variables (non-parameters), the write-type invariance analysis (pass 2) achieves a similar effect safely — if every write to a slot produces the same type, that type persists across labels without needing to hoist any guard.
|
||||
|
||||
### Tail Call Optimization
|
||||
|
||||
`tail_invoke` instructions are currently marked but execute identically to `invoke`. Actual TCO would reuse the current call frame instead of creating a new one. This requires:
|
||||
|
||||
- Ensuring argument count matches (or the frame can be resized)
|
||||
- No live locals needed after the call (guaranteed by tail position)
|
||||
- No disruption handler on the current function (already enforced by the marking)
|
||||
- VM support in mach.c to rewrite the frame in place
|
||||
|
||||
### Interprocedural Type Inference
|
||||
|
||||
Currently all type inference is intraprocedural (within a single function). Cross-function analysis could:
|
||||
|
||||
- Infer return types from function bodies
|
||||
- Propagate argument types from call sites to callees
|
||||
- Specialize functions for known argument types (cloning)
|
||||
|
||||
### Strength Reduction
|
||||
|
||||
Common patterns that could be lowered to cheaper operations when operand types are known:
|
||||
|
||||
- `multiply x, 2` with proven-int operands → shift left
|
||||
- `divide x, 2` with proven-int → arithmetic shift right
|
||||
- `modulo x, power_of_2` with proven-int → bitwise and
|
||||
|
||||
### Numeric Unboxing (QBE/native path)
|
||||
|
||||
With unified arithmetic and backward type inference, the native backend can identify regions where numeric values remain in registers without boxing/unboxing:
|
||||
|
||||
1. **Guard once**: When backward inference proves a parameter is T_NUM, emit a single type guard at function entry.
|
||||
2. **Unbox**: Convert the tagged JSValue to a raw double register.
|
||||
3. **Operate**: Use native FP/int instructions directly (no function calls, no tag checks).
|
||||
4. **Rebox**: Convert back to tagged JSValue only at rebox points (function returns, calls, stores to arrays/records).
|
||||
|
||||
This requires inserting `unbox`/`rebox` IR annotations (no-ops in the Mach VM, meaningful only to QBE).
|
||||
|
||||
### Loop-Invariant Code Motion
|
||||
|
||||
Type checks that are invariant across loop iterations (checking a variable that doesn't change in the loop body) could be hoisted above the loop. This would require identifying loop boundaries and proving invariance.
|
||||
|
||||
### Algebraic Identity Optimization
|
||||
|
||||
With unified arithmetic, algebraic identities (x+0→x, x*1→x, x*0→0, x/1→x) require knowing operand values at compile time. Since generic `add`/`multiply` operate on any numeric type, the constant-tracking logic in `simplify_algebra` could be extended to handle these for known-constant slots.
|
||||
|
||||
## Nop Convention
|
||||
|
||||
Eliminated instructions are replaced with strings matching `_nop_<prefix>_<counter>`. The prefix identifies which pass created the nop. Nop strings are:
|
||||
|
||||
@@ -36,15 +36,9 @@ def T_RECORD = "record"
|
||||
def T_FUNCTION = "function"
|
||||
|
||||
def int_result_ops = {
|
||||
add_int: true, sub_int: true, mul_int: true,
|
||||
div_int: true, mod_int: true, neg_int: true,
|
||||
bitnot: true, bitand: true, bitor: true,
|
||||
bitxor: true, shl: true, shr: true, ushr: true
|
||||
}
|
||||
def float_result_ops = {
|
||||
add_float: true, sub_float: true, mul_float: true,
|
||||
div_float: true, mod_float: true, neg_float: true
|
||||
}
|
||||
def bool_result_ops = {
|
||||
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
|
||||
le_int: true, ge_int: true,
|
||||
@@ -85,8 +79,6 @@ var track_types = function(slot_types, instr) {
|
||||
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
|
||||
} else if (int_result_ops[op] == true) {
|
||||
slot_types[text(instr[1])] = T_INT
|
||||
} else if (float_result_ops[op] == true) {
|
||||
slot_types[text(instr[1])] = T_FLOAT
|
||||
} else if (op == "concat") {
|
||||
slot_types[text(instr[1])] = T_TEXT
|
||||
} else if (bool_result_ops[op] == true) {
|
||||
@@ -99,7 +91,7 @@ var track_types = function(slot_types, instr) {
|
||||
slot_types[text(instr[1])] = T_RECORD
|
||||
} else if (op == "function") {
|
||||
slot_types[text(instr[1])] = T_FUNCTION
|
||||
} else if (op == "invoke") {
|
||||
} else if (op == "invoke" || op == "tail_invoke") {
|
||||
slot_types[text(instr[2])] = T_UNKNOWN
|
||||
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
|
||||
slot_types[text(instr[1])] = T_UNKNOWN
|
||||
@@ -107,6 +99,9 @@ var track_types = function(slot_types, instr) {
|
||||
slot_types[text(instr[1])] = T_UNKNOWN
|
||||
} else if (op == "length") {
|
||||
slot_types[text(instr[1])] = T_INT
|
||||
} else if (op == "add" || op == "subtract" || op == "multiply" ||
|
||||
op == "divide" || op == "modulo" || op == "pow" || op == "negate") {
|
||||
slot_types[text(instr[1])] = T_UNKNOWN
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
2
fd.cm
2
fd.cm
@@ -97,4 +97,4 @@ fd.globfs = function(globs, dir) {
|
||||
return results
|
||||
}
|
||||
|
||||
return fd
|
||||
return fd
|
||||
|
||||
23
fold.cm
23
fold.cm
@@ -15,10 +15,18 @@ var fold = function(ast) {
|
||||
return k == "number" || k == "text" || k == "true" || k == "false" || k == "null"
|
||||
}
|
||||
|
||||
// Only intrinsics that can NEVER disrupt regardless of argument types
|
||||
var pure_intrinsics = {
|
||||
is_array: true, is_text: true, is_number: true, is_integer: true,
|
||||
is_function: true, is_logical: true, is_null: true, is_object: true,
|
||||
is_stone: true
|
||||
}
|
||||
|
||||
var is_pure = function(expr) {
|
||||
if (expr == null) return true
|
||||
var k = expr.kind
|
||||
var i = 0
|
||||
var target = null
|
||||
if (k == "number" || k == "text" || k == "true" || k == "false" ||
|
||||
k == "null" || k == "name" || k == "this") return true
|
||||
if (k == "function") return true
|
||||
@@ -47,6 +55,17 @@ var fold = function(ast) {
|
||||
if (k == "==" || k == "!=" || k == "&&" || k == "||") {
|
||||
return is_pure(expr.left) && is_pure(expr.right)
|
||||
}
|
||||
if (k == "(") {
|
||||
target = expr.expression
|
||||
if (target != null && target.intrinsic == true && pure_intrinsics[target.name] == true) {
|
||||
i = 0
|
||||
while (i < length(expr.list)) {
|
||||
if (!is_pure(expr.list[i])) return false
|
||||
i = i + 1
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -676,6 +695,10 @@ var fold = function(ast) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Dead pure call elimination: standalone pure calls with no result
|
||||
if (stmt.kind == "call" && is_pure(stmt.expression)) {
|
||||
stmt.dead = true
|
||||
}
|
||||
// Dead function elimination
|
||||
if (stmt.kind == "function" && stmt.name != null) {
|
||||
sv = scope_var(fn_nr, stmt.name)
|
||||
|
||||
230
mcode.cm
230
mcode.cm
@@ -52,6 +52,7 @@ var mcode = function(ast) {
|
||||
var s_cur_line = 0
|
||||
var s_cur_col = 0
|
||||
var s_filename = null
|
||||
var s_has_disruption = false
|
||||
|
||||
// Shared closure vars for binop helpers (avoids >4 param functions)
|
||||
var _bp_dest = 0
|
||||
@@ -78,7 +79,8 @@ var mcode = function(ast) {
|
||||
function_nr: s_function_nr,
|
||||
intrinsic_cache: s_intrinsic_cache,
|
||||
cur_line: s_cur_line,
|
||||
cur_col: s_cur_col
|
||||
cur_col: s_cur_col,
|
||||
has_disruption: s_has_disruption
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +101,7 @@ var mcode = function(ast) {
|
||||
s_intrinsic_cache = saved.intrinsic_cache
|
||||
s_cur_line = saved.cur_line
|
||||
s_cur_col = saved.cur_col
|
||||
s_has_disruption = saved.has_disruption
|
||||
}
|
||||
|
||||
// Slot allocation
|
||||
@@ -270,157 +273,19 @@ var mcode = function(ast) {
|
||||
return node.kind == "null"
|
||||
}
|
||||
|
||||
// emit_add_decomposed: int path -> text path -> float path -> disrupt
|
||||
// emit_add_decomposed: emit generic add (VM dispatches int/float/text)
|
||||
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
|
||||
var emit_add_decomposed = function() {
|
||||
var dest = _bp_dest
|
||||
var left = _bp_left
|
||||
var right = _bp_right
|
||||
var t0 = 0
|
||||
var t1 = 0
|
||||
var left_is_int = is_known_int(_bp_ln)
|
||||
var left_is_text = is_known_text(_bp_ln)
|
||||
var left_is_num = is_known_number(_bp_ln)
|
||||
var right_is_int = is_known_int(_bp_rn)
|
||||
var right_is_text = is_known_text(_bp_rn)
|
||||
var right_is_num = is_known_number(_bp_rn)
|
||||
var not_int = null
|
||||
var not_text = null
|
||||
var done = null
|
||||
var err = null
|
||||
|
||||
// Both sides known int
|
||||
if (left_is_int && right_is_int) {
|
||||
emit_3("add_int", dest, left, right)
|
||||
// Known text+text → concat directly (skip numeric check in VM)
|
||||
if (is_known_text(_bp_ln) && is_known_text(_bp_rn)) {
|
||||
emit_3("concat", _bp_dest, _bp_left, _bp_right)
|
||||
return null
|
||||
}
|
||||
// Both sides known text
|
||||
if (left_is_text && right_is_text) {
|
||||
emit_3("concat", dest, left, right)
|
||||
return null
|
||||
}
|
||||
// Both sides known number (but not both int)
|
||||
if (left_is_num && right_is_num) {
|
||||
if (left_is_int && right_is_int) {
|
||||
emit_3("add_int", dest, left, right)
|
||||
} else {
|
||||
emit_3("add_float", dest, left, right)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
not_int = gen_label("add_ni")
|
||||
not_text = gen_label("add_nt")
|
||||
done = gen_label("add_done")
|
||||
err = gen_label("add_err")
|
||||
|
||||
// Int path
|
||||
t0 = alloc_slot()
|
||||
if (!left_is_int) {
|
||||
emit_2("is_int", t0, left)
|
||||
emit_jump_cond("jump_false", t0, not_int)
|
||||
}
|
||||
t1 = alloc_slot()
|
||||
if (!right_is_int) {
|
||||
emit_2("is_int", t1, right)
|
||||
emit_jump_cond("jump_false", t1, not_int)
|
||||
}
|
||||
emit_3("add_int", dest, left, right)
|
||||
emit_jump(done)
|
||||
|
||||
// Text path
|
||||
emit_label(not_int)
|
||||
if (!left_is_text) {
|
||||
emit_2("is_text", t0, left)
|
||||
emit_jump_cond("jump_false", t0, not_text)
|
||||
}
|
||||
if (!right_is_text) {
|
||||
emit_2("is_text", t1, right)
|
||||
emit_jump_cond("jump_false", t1, not_text)
|
||||
}
|
||||
emit_3("concat", dest, left, right)
|
||||
emit_jump(done)
|
||||
|
||||
// Float path
|
||||
emit_label(not_text)
|
||||
if (!left_is_num) {
|
||||
emit_2("is_num", t0, left)
|
||||
emit_jump_cond("jump_false", t0, err)
|
||||
}
|
||||
if (!right_is_num) {
|
||||
emit_2("is_num", t1, right)
|
||||
emit_jump_cond("jump_false", t1, err)
|
||||
}
|
||||
emit_3("add_float", dest, left, right)
|
||||
emit_jump(done)
|
||||
|
||||
emit_label(err)
|
||||
emit_0("disrupt")
|
||||
emit_label(done)
|
||||
emit_3("add", _bp_dest, _bp_left, _bp_right)
|
||||
return null
|
||||
}
|
||||
|
||||
// emit_numeric_binop: int path -> float path -> disrupt
|
||||
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
|
||||
var emit_numeric_binop = function(int_op, float_op) {
|
||||
var dest = _bp_dest
|
||||
var left = _bp_left
|
||||
var right = _bp_right
|
||||
var t0 = 0
|
||||
var t1 = 0
|
||||
var left_is_int = is_known_int(_bp_ln)
|
||||
var left_is_num = is_known_number(_bp_ln)
|
||||
var right_is_int = is_known_int(_bp_rn)
|
||||
var right_is_num = is_known_number(_bp_rn)
|
||||
var not_int = null
|
||||
var done = null
|
||||
var err = null
|
||||
|
||||
// Both sides known int
|
||||
if (left_is_int && right_is_int) {
|
||||
emit_3(int_op, dest, left, right)
|
||||
return null
|
||||
}
|
||||
// Both sides known number (but not both int)
|
||||
if (left_is_num && right_is_num) {
|
||||
emit_3(float_op, dest, left, right)
|
||||
return null
|
||||
}
|
||||
|
||||
not_int = gen_label("num_ni")
|
||||
done = gen_label("num_done")
|
||||
err = gen_label("num_err")
|
||||
|
||||
t0 = alloc_slot()
|
||||
if (!left_is_int) {
|
||||
emit_2("is_int", t0, left)
|
||||
emit_jump_cond("jump_false", t0, not_int)
|
||||
}
|
||||
t1 = alloc_slot()
|
||||
if (!right_is_int) {
|
||||
emit_2("is_int", t1, right)
|
||||
emit_jump_cond("jump_false", t1, not_int)
|
||||
}
|
||||
emit_3(int_op, dest, left, right)
|
||||
emit_jump(done)
|
||||
|
||||
emit_label(not_int)
|
||||
if (!left_is_num) {
|
||||
emit_2("is_num", t0, left)
|
||||
emit_jump_cond("jump_false", t0, err)
|
||||
}
|
||||
if (!right_is_num) {
|
||||
emit_2("is_num", t1, right)
|
||||
emit_jump_cond("jump_false", t1, err)
|
||||
}
|
||||
emit_3(float_op, dest, left, right)
|
||||
emit_jump(done)
|
||||
|
||||
emit_label(err)
|
||||
emit_0("disrupt")
|
||||
emit_label(done)
|
||||
return null
|
||||
}
|
||||
// emit_numeric_binop removed — generic ops emitted directly via passthrough
|
||||
|
||||
// emit_eq_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(false)
|
||||
// reads _bp_dest, _bp_left, _bp_right from closure
|
||||
@@ -646,41 +511,9 @@ var mcode = function(ast) {
|
||||
return null
|
||||
}
|
||||
|
||||
// emit_neg_decomposed: int path -> float path -> disrupt
|
||||
// emit_neg_decomposed: emit generic negate (VM dispatches int/float)
|
||||
var emit_neg_decomposed = function(dest, src, src_node) {
|
||||
var t0 = 0
|
||||
var not_int = null
|
||||
var done = null
|
||||
var err = null
|
||||
|
||||
if (is_known_int(src_node)) {
|
||||
emit_2("neg_int", dest, src)
|
||||
return null
|
||||
}
|
||||
if (is_known_number(src_node)) {
|
||||
emit_2("neg_float", dest, src)
|
||||
return null
|
||||
}
|
||||
|
||||
not_int = gen_label("neg_ni")
|
||||
done = gen_label("neg_done")
|
||||
err = gen_label("neg_err")
|
||||
|
||||
t0 = alloc_slot()
|
||||
emit_2("is_int", t0, src)
|
||||
emit_jump_cond("jump_false", t0, not_int)
|
||||
emit_2("neg_int", dest, src)
|
||||
emit_jump(done)
|
||||
|
||||
emit_label(not_int)
|
||||
emit_2("is_num", t0, src)
|
||||
emit_jump_cond("jump_false", t0, err)
|
||||
emit_2("neg_float", dest, src)
|
||||
emit_jump(done)
|
||||
|
||||
emit_label(err)
|
||||
emit_0("disrupt")
|
||||
emit_label(done)
|
||||
emit_2("negate", dest, src)
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -692,14 +525,6 @@ var mcode = function(ast) {
|
||||
_bp_right = right
|
||||
if (op_str == "add") {
|
||||
emit_add_decomposed()
|
||||
} else if (op_str == "subtract") {
|
||||
emit_numeric_binop("sub_int", "sub_float")
|
||||
} else if (op_str == "multiply") {
|
||||
emit_numeric_binop("mul_int", "mul_float")
|
||||
} else if (op_str == "divide") {
|
||||
emit_numeric_binop("div_int", "div_float")
|
||||
} else if (op_str == "modulo") {
|
||||
emit_numeric_binop("mod_int", "mod_float")
|
||||
} else if (op_str == "eq") {
|
||||
emit_eq_decomposed()
|
||||
} else if (op_str == "ne") {
|
||||
@@ -713,7 +538,8 @@ var mcode = function(ast) {
|
||||
} else if (op_str == "ge") {
|
||||
emit_relational("ge_int", "ge_float", "ge_text")
|
||||
} else {
|
||||
// Passthrough for bitwise, pow, in, etc.
|
||||
// Passthrough for subtract, multiply, divide, modulo,
|
||||
// bitwise, pow, in, etc.
|
||||
emit_3(op_str, dest, left, right)
|
||||
}
|
||||
return null
|
||||
@@ -1024,9 +850,9 @@ var mcode = function(ast) {
|
||||
emit_3("setarg", f, 2, item)
|
||||
emit_2("invoke", f, acc)
|
||||
if (forward) {
|
||||
emit_3("add_int", i, i, one)
|
||||
emit_3("add", i, i, one)
|
||||
} else {
|
||||
emit_3("sub_int", i, i, one)
|
||||
emit_3("subtract", i, i, one)
|
||||
}
|
||||
emit_jump(loop_label)
|
||||
}
|
||||
@@ -1056,7 +882,7 @@ var mcode = function(ast) {
|
||||
emit_3("setarg", f, 1, item)
|
||||
emit_3("setarg", f, 2, i)
|
||||
emit_2("invoke", f, discard)
|
||||
emit_3("add_int", i, i, one)
|
||||
emit_3("add", i, i, one)
|
||||
emit_jump(loop_label)
|
||||
emit_label(done_label)
|
||||
emit_1("null", dest)
|
||||
@@ -1090,7 +916,7 @@ var mcode = function(ast) {
|
||||
emit_3("setarg", f, 1, item)
|
||||
emit_2("invoke", f, val)
|
||||
emit_jump_cond("jump_false", val, ret_false)
|
||||
emit_3("add_int", i, i, one)
|
||||
emit_3("add", i, i, one)
|
||||
emit_jump(loop_label)
|
||||
emit_label(ret_true)
|
||||
emit_1("true", dest)
|
||||
@@ -1128,7 +954,7 @@ var mcode = function(ast) {
|
||||
emit_3("setarg", f, 1, item)
|
||||
emit_2("invoke", f, val)
|
||||
emit_jump_cond("jump_true", val, ret_true)
|
||||
emit_3("add_int", i, i, one)
|
||||
emit_3("add", i, i, one)
|
||||
emit_jump(loop_label)
|
||||
emit_label(ret_true)
|
||||
emit_1("true", dest)
|
||||
@@ -1170,7 +996,7 @@ var mcode = function(ast) {
|
||||
emit_jump_cond("jump_false", val, skip_label)
|
||||
emit_2("push", result, item)
|
||||
emit_label(skip_label)
|
||||
emit_3("add_int", i, i, one)
|
||||
emit_3("add", i, i, one)
|
||||
emit_jump(loop_label)
|
||||
emit_label(done_label)
|
||||
emit_2("move", dest, result)
|
||||
@@ -1269,9 +1095,9 @@ var mcode = function(ast) {
|
||||
emit_jump(final_label)
|
||||
// No initial, reverse
|
||||
emit_label(no_init_rev)
|
||||
emit_3("sub_int", i, len, one)
|
||||
emit_3("subtract", i, len, one)
|
||||
emit_3("load_index", acc, arr_slot, i)
|
||||
emit_3("sub_int", i, i, one)
|
||||
emit_3("subtract", i, i, one)
|
||||
emit_reduce_loop(r, false, d2)
|
||||
emit_label(d2)
|
||||
emit_2("move", dest, acc)
|
||||
@@ -1292,7 +1118,7 @@ var mcode = function(ast) {
|
||||
// Has initial, reverse
|
||||
emit_label(init_rev)
|
||||
emit_2("move", acc, init_slot)
|
||||
emit_3("sub_int", i, len, one)
|
||||
emit_3("subtract", i, len, one)
|
||||
emit_reduce_loop(r, false, d4)
|
||||
emit_label(d4)
|
||||
emit_2("move", dest, acc)
|
||||
@@ -2230,6 +2056,7 @@ var mcode = function(ast) {
|
||||
var guard_t = 0
|
||||
var guard_err = null
|
||||
var guard_done = null
|
||||
var last_instr = null
|
||||
|
||||
if (stmt == null) {
|
||||
return null
|
||||
@@ -2436,6 +2263,13 @@ var mcode = function(ast) {
|
||||
expr = stmt.expression
|
||||
if (expr != null) {
|
||||
slot = gen_expr(expr, -1)
|
||||
// Mark tail calls: rename last invoke to tail_invoke
|
||||
if (stmt.tail == true && !s_has_disruption) {
|
||||
last_instr = s_instructions[length(s_instructions) - 1]
|
||||
if (is_array(last_instr) && last_instr[0] == "invoke") {
|
||||
last_instr[0] = "tail_invoke"
|
||||
}
|
||||
}
|
||||
emit_1("return", slot)
|
||||
} else {
|
||||
null_slot = alloc_slot()
|
||||
@@ -2618,6 +2452,7 @@ var mcode = function(ast) {
|
||||
s_label_map = {}
|
||||
|
||||
s_is_arrow = is_arrow
|
||||
s_has_disruption = disrupt_clause != null && is_array(disrupt_clause)
|
||||
|
||||
s_function_nr = fn_nr_node != null ? fn_nr_node : 0
|
||||
|
||||
@@ -2726,6 +2561,7 @@ var mcode = function(ast) {
|
||||
|
||||
// Compile disruption clause
|
||||
if (disrupt_clause != null && is_array(disrupt_clause)) {
|
||||
emit_label(gen_label("disruption"))
|
||||
disruption_start = length(s_instructions)
|
||||
_i = 0
|
||||
while (_i < length(disrupt_clause)) {
|
||||
|
||||
42
parse.cm
42
parse.cm
@@ -1627,6 +1627,46 @@ var parse = function(tokens, src, filename, tokenizer) {
|
||||
if (kind == "[" && left_node.right != null) {
|
||||
sem_check_expr(scope, left_node.right)
|
||||
}
|
||||
// Type error detection for known-type constant objects
|
||||
if (obj_expr != null && obj_expr.kind == "name" && obj_expr.name != null) {
|
||||
v = sem_find_var(scope, obj_expr.name)
|
||||
if (v != null && v.is_const && v.type_tag != null) {
|
||||
if (kind == ".") {
|
||||
if (v.type_tag == "array") {
|
||||
sem_error(left_node, "cannot set property on array '" + obj_expr.name + "'")
|
||||
}
|
||||
} else if (kind == "[") {
|
||||
if (left_node.right == null) {
|
||||
// Push: a[] = val
|
||||
if (v.type_tag != "array") {
|
||||
sem_error(left_node, "push only works on arrays, not " + v.type_tag + " '" + obj_expr.name + "'")
|
||||
}
|
||||
} else if (v.type_tag == "array") {
|
||||
if (left_node.right.kind == "text") {
|
||||
sem_error(left_node, "cannot use text key on array '" + obj_expr.name + "'")
|
||||
}
|
||||
} else if (v.type_tag == "record") {
|
||||
if (left_node.right.kind == "number" && is_integer(left_node.right.number)) {
|
||||
sem_error(left_node, "cannot use integer key on record '" + obj_expr.name + "'; use text key")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (v != null && v.is_const && v.type_tag == null) {
|
||||
// Infer type_tag from usage pattern (def only)
|
||||
if (kind == ".") {
|
||||
v.type_tag = "record"
|
||||
} else if (kind == "[") {
|
||||
if (left_node.right == null) {
|
||||
// Push: a[] = val → array
|
||||
v.type_tag = "array"
|
||||
} else if (left_node.right.kind == "number" && is_integer(left_node.right.number)) {
|
||||
v.type_tag = "array"
|
||||
} else if (left_node.right.kind == "text") {
|
||||
v.type_tag = "record"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1878,7 +1918,7 @@ var parse = function(tokens, src, filename, tokenizer) {
|
||||
sem_check_expr(scope, stmt.right)
|
||||
if (name != null) {
|
||||
tt = derive_type_tag(stmt.right)
|
||||
if (tt != null) {
|
||||
if (tt != null && tt != "null") {
|
||||
existing = sem_find_var(scope, name)
|
||||
if (existing != null) existing.type_tag = tt
|
||||
}
|
||||
|
||||
175
qbe.cm
175
qbe.cm
@@ -732,167 +732,6 @@ var ushr = function(p, ctx, a, b) {
|
||||
// These map directly to the new IR ops emitted by mcode.cm.
|
||||
// ============================================================
|
||||
|
||||
// --- Arithmetic (int path) ---
|
||||
// add_int: assume both operands are tagged ints. Overflow -> float.
|
||||
var add_int = function(p, ctx, a, b) {
|
||||
return ` %${p}.ia =l sar ${a}, 1
|
||||
%${p}.ib =l sar ${b}, 1
|
||||
%${p}.sum =l add %${p}.ia, %${p}.ib
|
||||
%${p}.lo =w csltl %${p}.sum, ${int32_min}
|
||||
%${p}.hi =w csgtl %${p}.sum, ${int32_max}
|
||||
%${p}.ov =w or %${p}.lo, %${p}.hi
|
||||
jnz %${p}.ov, @${p}.ov, @${p}.ok
|
||||
@${p}.ok
|
||||
%${p}.rw =w copy %${p}.sum
|
||||
%${p}.rext =l extuw %${p}.rw
|
||||
%${p} =l shl %${p}.rext, 1
|
||||
jmp @${p}.done
|
||||
@${p}.ov
|
||||
%${p}.fd =d sltof %${p}.sum
|
||||
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
|
||||
@${p}.done
|
||||
`
|
||||
}
|
||||
|
||||
var sub_int = function(p, ctx, a, b) {
|
||||
return ` %${p}.ia =l sar ${a}, 1
|
||||
%${p}.ib =l sar ${b}, 1
|
||||
%${p}.diff =l sub %${p}.ia, %${p}.ib
|
||||
%${p}.lo =w csltl %${p}.diff, ${int32_min}
|
||||
%${p}.hi =w csgtl %${p}.diff, ${int32_max}
|
||||
%${p}.ov =w or %${p}.lo, %${p}.hi
|
||||
jnz %${p}.ov, @${p}.ov, @${p}.ok
|
||||
@${p}.ok
|
||||
%${p}.rw =w copy %${p}.diff
|
||||
%${p}.rext =l extuw %${p}.rw
|
||||
%${p} =l shl %${p}.rext, 1
|
||||
jmp @${p}.done
|
||||
@${p}.ov
|
||||
%${p}.fd =d sltof %${p}.diff
|
||||
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
|
||||
@${p}.done
|
||||
`
|
||||
}
|
||||
|
||||
var mul_int = function(p, ctx, a, b) {
|
||||
return ` %${p}.ia =l sar ${a}, 1
|
||||
%${p}.ib =l sar ${b}, 1
|
||||
%${p}.prod =l mul %${p}.ia, %${p}.ib
|
||||
%${p}.lo =w csltl %${p}.prod, ${int32_min}
|
||||
%${p}.hi =w csgtl %${p}.prod, ${int32_max}
|
||||
%${p}.ov =w or %${p}.lo, %${p}.hi
|
||||
jnz %${p}.ov, @${p}.ov, @${p}.ok
|
||||
@${p}.ok
|
||||
%${p}.rw =w copy %${p}.prod
|
||||
%${p}.rext =l extuw %${p}.rw
|
||||
%${p} =l shl %${p}.rext, 1
|
||||
jmp @${p}.done
|
||||
@${p}.ov
|
||||
%${p}.fd =d sltof %${p}.prod
|
||||
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
|
||||
@${p}.done
|
||||
`
|
||||
}
|
||||
|
||||
var div_int = function(p, ctx, a, b) {
|
||||
return ` %${p}.ia =w copy 0
|
||||
%${p}.tmp =l sar ${a}, 1
|
||||
%${p}.ia =w copy %${p}.tmp
|
||||
%${p}.ib =w copy 0
|
||||
%${p}.tmp2 =l sar ${b}, 1
|
||||
%${p}.ib =w copy %${p}.tmp2
|
||||
%${p}.div0 =w ceqw %${p}.ib, 0
|
||||
jnz %${p}.div0, @${p}.null, @${p}.chk
|
||||
@${p}.null
|
||||
%${p} =l copy ${js_null}
|
||||
jmp @${p}.done
|
||||
@${p}.chk
|
||||
%${p}.rem =w rem %${p}.ia, %${p}.ib
|
||||
%${p}.exact =w ceqw %${p}.rem, 0
|
||||
jnz %${p}.exact, @${p}.idiv, @${p}.fdiv
|
||||
@${p}.idiv
|
||||
%${p}.q =w div %${p}.ia, %${p}.ib
|
||||
%${p}.qext =l extuw %${p}.q
|
||||
%${p} =l shl %${p}.qext, 1
|
||||
jmp @${p}.done
|
||||
@${p}.fdiv
|
||||
%${p}.da =d swtof %${p}.ia
|
||||
%${p}.db =d swtof %${p}.ib
|
||||
%${p}.dr =d div %${p}.da, %${p}.db
|
||||
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.dr)
|
||||
@${p}.done
|
||||
`
|
||||
}
|
||||
|
||||
var mod_int = function(p, ctx, a, b) {
|
||||
return ` %${p}.ia =w copy 0
|
||||
%${p}.tmp =l sar ${a}, 1
|
||||
%${p}.ia =w copy %${p}.tmp
|
||||
%${p}.ib =w copy 0
|
||||
%${p}.tmp2 =l sar ${b}, 1
|
||||
%${p}.ib =w copy %${p}.tmp2
|
||||
%${p}.div0 =w ceqw %${p}.ib, 0
|
||||
jnz %${p}.div0, @${p}.null, @${p}.do_mod
|
||||
@${p}.null
|
||||
%${p} =l copy ${js_null}
|
||||
jmp @${p}.done
|
||||
@${p}.do_mod
|
||||
%${p}.r =w rem %${p}.ia, %${p}.ib
|
||||
%${p}.rext =l extuw %${p}.r
|
||||
%${p} =l shl %${p}.rext, 1
|
||||
@${p}.done
|
||||
`
|
||||
}
|
||||
|
||||
var neg_int = function(p, ctx, v) {
|
||||
return ` %${p}.sl =l sar ${v}, 1
|
||||
%${p}.iw =w copy %${p}.sl
|
||||
%${p}.is_min =w ceqw %${p}.iw, ${int32_min}
|
||||
jnz %${p}.is_min, @${p}.ov, @${p}.ok
|
||||
@${p}.ov
|
||||
%${p}.fd =d swtof %${p}.iw
|
||||
%${p}.fdn =d neg %${p}.fd
|
||||
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fdn)
|
||||
jmp @${p}.done
|
||||
@${p}.ok
|
||||
%${p}.ni =w sub 0, %${p}.iw
|
||||
%${p}.niext =l extuw %${p}.ni
|
||||
%${p} =l shl %${p}.niext, 1
|
||||
@${p}.done
|
||||
`
|
||||
}
|
||||
|
||||
// --- Arithmetic (float path) ---
|
||||
var add_float = function(p, ctx, a, b) {
|
||||
return ` %${p} =l call $qbe_float_add(l ${ctx}, l ${a}, l ${b})
|
||||
`
|
||||
}
|
||||
|
||||
var sub_float = function(p, ctx, a, b) {
|
||||
return ` %${p} =l call $qbe_float_sub(l ${ctx}, l ${a}, l ${b})
|
||||
`
|
||||
}
|
||||
|
||||
var mul_float = function(p, ctx, a, b) {
|
||||
return ` %${p} =l call $qbe_float_mul(l ${ctx}, l ${a}, l ${b})
|
||||
`
|
||||
}
|
||||
|
||||
var div_float = function(p, ctx, a, b) {
|
||||
return ` %${p} =l call $qbe_float_div(l ${ctx}, l ${a}, l ${b})
|
||||
`
|
||||
}
|
||||
|
||||
var mod_float = function(p, ctx, a, b) {
|
||||
return ` %${p} =l call $qbe_float_mod(l ${ctx}, l ${a}, l ${b})
|
||||
`
|
||||
}
|
||||
|
||||
var neg_float = function(p, ctx, v) {
|
||||
return ` %${p} =l call $qbe_float_neg(l ${ctx}, l ${v})
|
||||
`
|
||||
}
|
||||
|
||||
// --- Text concat ---
|
||||
var concat = function(p, ctx, a, b) {
|
||||
return ` %${p} =l call $JS_ConcatString(l ${ctx}, l ${a}, l ${b})
|
||||
@@ -1039,20 +878,6 @@ return {
|
||||
shl: shl,
|
||||
shr: shr,
|
||||
ushr: ushr,
|
||||
// decomposed arithmetic (int path)
|
||||
add_int: add_int,
|
||||
sub_int: sub_int,
|
||||
mul_int: mul_int,
|
||||
div_int: div_int,
|
||||
mod_int: mod_int,
|
||||
neg_int: neg_int,
|
||||
// decomposed arithmetic (float path)
|
||||
add_float: add_float,
|
||||
sub_float: sub_float,
|
||||
mul_float: mul_float,
|
||||
div_float: div_float,
|
||||
mod_float: mod_float,
|
||||
neg_float: neg_float,
|
||||
// text concat
|
||||
concat: concat,
|
||||
// decomposed comparisons (int)
|
||||
|
||||
57
qbe_emit.cm
57
qbe_emit.cm
@@ -201,77 +201,46 @@ var qbe_emit = function(ir, qbe) {
|
||||
continue
|
||||
}
|
||||
|
||||
// --- Arithmetic (int path) — use qbe.cm macros ---
|
||||
// --- Generic arithmetic (VM dispatches int/float) ---
|
||||
|
||||
if (op == "add_int") {
|
||||
if (op == "add") {
|
||||
p = fresh()
|
||||
emit(qbe.add_int(p, "%ctx", s(a2), s(a3)))
|
||||
emit(qbe.add(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "sub_int") {
|
||||
if (op == "subtract") {
|
||||
p = fresh()
|
||||
emit(qbe.sub_int(p, "%ctx", s(a2), s(a3)))
|
||||
emit(qbe.sub(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "mul_int") {
|
||||
if (op == "multiply") {
|
||||
p = fresh()
|
||||
emit(qbe.mul_int(p, "%ctx", s(a2), s(a3)))
|
||||
emit(qbe.mul(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "div_int") {
|
||||
if (op == "divide") {
|
||||
p = fresh()
|
||||
emit(qbe.div_int(p, "%ctx", s(a2), s(a3)))
|
||||
emit(qbe.div(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "mod_int") {
|
||||
if (op == "modulo") {
|
||||
p = fresh()
|
||||
emit(qbe.mod_int(p, "%ctx", s(a2), s(a3)))
|
||||
emit(qbe.mod(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
|
||||
// --- Arithmetic (float path) ---
|
||||
|
||||
if (op == "add_float") {
|
||||
if (op == "negate") {
|
||||
p = fresh()
|
||||
emit(qbe.add_float(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "sub_float") {
|
||||
p = fresh()
|
||||
emit(qbe.sub_float(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "mul_float") {
|
||||
p = fresh()
|
||||
emit(qbe.mul_float(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "div_float") {
|
||||
p = fresh()
|
||||
emit(qbe.div_float(p, "%ctx", s(a2), s(a3)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
}
|
||||
if (op == "mod_float") {
|
||||
p = fresh()
|
||||
emit(qbe.mod_float(p, "%ctx", s(a2), s(a3)))
|
||||
emit(qbe.neg(p, "%ctx", s(a2)))
|
||||
emit(` ${s(a1)} =l copy %${p}`)
|
||||
wb(a1)
|
||||
continue
|
||||
|
||||
4
regen.ce
4
regen.ce
@@ -1,5 +1,4 @@
|
||||
// regen.ce — regenerate .mcode bytecode files and pre-warm .mach cache
|
||||
// Run with: ./cell --core . regen
|
||||
|
||||
var fd = use("fd")
|
||||
var json = use("json")
|
||||
@@ -20,7 +19,8 @@ var files = [
|
||||
{src: "qbe_emit.cm", name: "qbe_emit", out: "boot/qbe_emit.cm.mcode"},
|
||||
{src: "verify_ir.cm", name: "verify_ir", out: "boot/verify_ir.cm.mcode"},
|
||||
{src: "internal/bootstrap.cm", name: "bootstrap", out: "boot/bootstrap.cm.mcode"},
|
||||
{src: "internal/engine.cm", name: "engine", out: "boot/engine.cm.mcode"}
|
||||
{src: "internal/engine.cm", name: "engine", out: "boot/engine.cm.mcode"},
|
||||
{src: "boot/seed_bootstrap.cm", name: "seed_bootstrap", out: "boot/seed_bootstrap.cm.mcode"}
|
||||
]
|
||||
|
||||
// Resolve shop_path for cache writes
|
||||
|
||||
@@ -11,8 +11,9 @@
|
||||
#include "cell_internal.h"
|
||||
#include "cJSON.h"
|
||||
|
||||
#define BOOTSTRAP_MCODE "boot/bootstrap.cm.mcode"
|
||||
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
|
||||
#define BOOTSTRAP_MCODE "boot/bootstrap.cm.mcode"
|
||||
#define SEED_BOOTSTRAP_MCODE "boot/seed_bootstrap.cm.mcode"
|
||||
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
|
||||
#define CELL_SHOP_DIR ".cell"
|
||||
#define CELL_CORE_DIR "packages/core"
|
||||
|
||||
@@ -362,6 +363,7 @@ static void print_usage(const char *prog)
|
||||
printf(" --core <path> Set core path directly (overrides CELL_CORE)\n");
|
||||
printf(" --shop <path> Set shop path (overrides CELL_SHOP)\n");
|
||||
printf(" --dev Dev mode (shop=.cell, core=.)\n");
|
||||
printf(" --seed Use seed bootstrap (minimal, for regen)\n");
|
||||
printf(" --test [heap_size] Run C test suite\n");
|
||||
printf(" -h, --help Show this help message\n");
|
||||
printf("\nEnvironment:\n");
|
||||
@@ -394,6 +396,7 @@ int cell_init(int argc, char **argv)
|
||||
|
||||
/* Default: run script through bootstrap pipeline */
|
||||
int arg_start = 1;
|
||||
int seed_mode = 0;
|
||||
const char *shop_override = NULL;
|
||||
const char *core_override = NULL;
|
||||
|
||||
@@ -413,6 +416,9 @@ int cell_init(int argc, char **argv)
|
||||
}
|
||||
core_override = argv[arg_start + 1];
|
||||
arg_start += 2;
|
||||
} else if (strcmp(argv[arg_start], "--seed") == 0) {
|
||||
seed_mode = 1;
|
||||
arg_start++;
|
||||
} else if (strcmp(argv[arg_start], "--dev") == 0) {
|
||||
shop_override = ".cell";
|
||||
core_override = ".";
|
||||
@@ -438,8 +444,9 @@ int cell_init(int argc, char **argv)
|
||||
|
||||
actor_initialize();
|
||||
|
||||
const char *boot_mcode = seed_mode ? SEED_BOOTSTRAP_MCODE : BOOTSTRAP_MCODE;
|
||||
size_t boot_size;
|
||||
char *boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
|
||||
char *boot_data = load_core_file(boot_mcode, &boot_size);
|
||||
if (!boot_data) {
|
||||
printf("ERROR: Could not load bootstrap from %s\n", core_path);
|
||||
return 1;
|
||||
@@ -460,7 +467,7 @@ int cell_init(int argc, char **argv)
|
||||
free(bin_data);
|
||||
return 1;
|
||||
}
|
||||
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, 16 * 1024 * 1024);
|
||||
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, 1024 * 1024);
|
||||
if (!ctx) {
|
||||
printf("Failed to create JS context\n");
|
||||
free(bin_data); JS_FreeRuntime(g_runtime);
|
||||
|
||||
128
source/mach.c
128
source/mach.c
@@ -442,7 +442,7 @@ JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count) {
|
||||
if (!frame) return NULL;
|
||||
|
||||
/* cap56 = slot count (used by gc_object_size) */
|
||||
frame->hdr = objhdr_make(slot_count, OBJ_FRAME, 0, 0, 0, 0);
|
||||
frame->header = objhdr_make(slot_count, OBJ_FRAME, 0, 0, 0, 0);
|
||||
frame->function = JS_NULL;
|
||||
frame->caller = JS_NULL;
|
||||
frame->address = JS_NewInt32(ctx, 0);
|
||||
@@ -767,9 +767,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
/* Setup initial frame — wrap top-level code in a function object so that
|
||||
returning from a called register function can read code/env from frame */
|
||||
JSValue top_fn = js_new_register_function(ctx, code, env_gc.val, of_gc.val);
|
||||
JS_PopGCRef(ctx, &of_gc);
|
||||
env = env_gc.val; /* refresh — GC may have moved env during allocation */
|
||||
JS_PopGCRef(ctx, &env_gc);
|
||||
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
|
||||
frame->function = top_fn;
|
||||
frame->slots[0] = this_gc.val; /* slot 0 = this */
|
||||
@@ -780,6 +778,8 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
}
|
||||
for (int i = nargs_copy - 1; i >= 0; i--) JS_PopGCRef(ctx, &arg_gcs[i]);
|
||||
JS_PopGCRef(ctx, &this_gc);
|
||||
JS_PopGCRef(ctx, &of_gc);
|
||||
JS_PopGCRef(ctx, &env_gc);
|
||||
|
||||
uint32_t pc = code->entry_point;
|
||||
JSValue result = JS_NULL;
|
||||
@@ -1262,100 +1262,6 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
|
||||
/* === New mcode-derived opcodes === */
|
||||
|
||||
/* Typed integer arithmetic — inline with overflow to float */
|
||||
case MACH_ADD_INT: {
|
||||
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
|
||||
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
|
||||
int64_t r = (int64_t)ia + (int64_t)ib;
|
||||
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
|
||||
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
|
||||
break;
|
||||
}
|
||||
case MACH_SUB_INT: {
|
||||
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
|
||||
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
|
||||
int64_t r = (int64_t)ia - (int64_t)ib;
|
||||
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
|
||||
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
|
||||
break;
|
||||
}
|
||||
case MACH_MUL_INT: {
|
||||
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
|
||||
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
|
||||
int64_t r = (int64_t)ia * (int64_t)ib;
|
||||
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
|
||||
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
|
||||
break;
|
||||
}
|
||||
case MACH_DIV_INT: {
|
||||
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
|
||||
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
|
||||
if (ib == 0) { frame->slots[a] = JS_NULL; break; }
|
||||
if (ia % ib == 0) frame->slots[a] = JS_NewInt32(ctx, ia / ib);
|
||||
else frame->slots[a] = JS_NewFloat64(ctx, (double)ia / (double)ib);
|
||||
break;
|
||||
}
|
||||
case MACH_MOD_INT: {
|
||||
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
|
||||
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
|
||||
if (ib == 0) { frame->slots[a] = JS_NULL; break; }
|
||||
frame->slots[a] = JS_NewInt32(ctx, ia % ib);
|
||||
break;
|
||||
}
|
||||
case MACH_NEG_INT: {
|
||||
int32_t i = JS_VALUE_GET_INT(frame->slots[b]);
|
||||
if (i == INT32_MIN)
|
||||
frame->slots[a] = JS_NewFloat64(ctx, -(double)i);
|
||||
else
|
||||
frame->slots[a] = JS_NewInt32(ctx, -i);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Typed float arithmetic */
|
||||
case MACH_ADD_FLOAT: {
|
||||
double da, db;
|
||||
JS_ToFloat64(ctx, &da, frame->slots[b]);
|
||||
JS_ToFloat64(ctx, &db, frame->slots[c]);
|
||||
frame->slots[a] = JS_NewFloat64(ctx, da + db);
|
||||
break;
|
||||
}
|
||||
case MACH_SUB_FLOAT: {
|
||||
double da, db;
|
||||
JS_ToFloat64(ctx, &da, frame->slots[b]);
|
||||
JS_ToFloat64(ctx, &db, frame->slots[c]);
|
||||
frame->slots[a] = JS_NewFloat64(ctx, da - db);
|
||||
break;
|
||||
}
|
||||
case MACH_MUL_FLOAT: {
|
||||
double da, db;
|
||||
JS_ToFloat64(ctx, &da, frame->slots[b]);
|
||||
JS_ToFloat64(ctx, &db, frame->slots[c]);
|
||||
frame->slots[a] = JS_NewFloat64(ctx, da * db);
|
||||
break;
|
||||
}
|
||||
case MACH_DIV_FLOAT: {
|
||||
double da, db;
|
||||
JS_ToFloat64(ctx, &da, frame->slots[b]);
|
||||
JS_ToFloat64(ctx, &db, frame->slots[c]);
|
||||
if (db == 0.0) { frame->slots[a] = JS_NULL; break; }
|
||||
frame->slots[a] = JS_NewFloat64(ctx, da / db);
|
||||
break;
|
||||
}
|
||||
case MACH_MOD_FLOAT: {
|
||||
double da, db;
|
||||
JS_ToFloat64(ctx, &da, frame->slots[b]);
|
||||
JS_ToFloat64(ctx, &db, frame->slots[c]);
|
||||
if (db == 0.0) { frame->slots[a] = JS_NULL; break; }
|
||||
frame->slots[a] = JS_NewFloat64(ctx, fmod(da, db));
|
||||
break;
|
||||
}
|
||||
case MACH_NEG_FLOAT: {
|
||||
double d;
|
||||
JS_ToFloat64(ctx, &d, frame->slots[b]);
|
||||
frame->slots[a] = JS_NewFloat64(ctx, -d);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Text concatenation */
|
||||
case MACH_CONCAT: {
|
||||
JSValue res = JS_ConcatString(ctx, frame->slots[b], frame->slots[c]);
|
||||
@@ -1653,7 +1559,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
case MACH_INVOKE: {
|
||||
/* A=frame_slot, B=result_slot */
|
||||
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
|
||||
int nr = (int)objhdr_cap56(fr->hdr);
|
||||
int nr = (int)objhdr_cap56(fr->header);
|
||||
int c_argc = (nr >= 2) ? nr - 2 : 0;
|
||||
JSValue fn_val = fr->function;
|
||||
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
|
||||
@@ -1704,7 +1610,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
case MACH_GOINVOKE: {
|
||||
/* Tail call: replace current frame with callee */
|
||||
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
|
||||
int nr = (int)objhdr_cap56(fr->hdr);
|
||||
int nr = (int)objhdr_cap56(fr->header);
|
||||
int c_argc = (nr >= 2) ? nr - 2 : 0;
|
||||
JSValue fn_val = fr->function;
|
||||
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
|
||||
@@ -2022,7 +1928,7 @@ static int mcode_reg_items(cJSON *it, cJSON **out) {
|
||||
{ ADD(1); return c; }
|
||||
|
||||
/* invoke: [1]=frame, [2]=dest (result register) */
|
||||
if (!strcmp(op, "invoke")) { ADD(1); ADD(2); return c; }
|
||||
if (!strcmp(op, "invoke") || !strcmp(op, "tail_invoke")) { ADD(1); ADD(2); return c; }
|
||||
/* goinvoke: [1]=frame only (no result) */
|
||||
if (!strcmp(op, "goinvoke")) { ADD(1); return c; }
|
||||
|
||||
@@ -2375,24 +2281,16 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
|
||||
else if (strcmp(op, "false") == 0) { EM(MACH_ABC(MACH_LOADFALSE, A1, 0, 0)); }
|
||||
else if (strcmp(op, "null") == 0) { EM(MACH_ABC(MACH_LOADNULL, A1, 0, 0)); }
|
||||
else if (strcmp(op, "move") == 0) { AB2(MACH_MOVE); }
|
||||
/* Typed integer arithmetic */
|
||||
else if (strcmp(op, "add_int") == 0) { ABC3(MACH_ADD_INT); }
|
||||
else if (strcmp(op, "sub_int") == 0) { ABC3(MACH_SUB_INT); }
|
||||
else if (strcmp(op, "mul_int") == 0) { ABC3(MACH_MUL_INT); }
|
||||
else if (strcmp(op, "div_int") == 0) { ABC3(MACH_DIV_INT); }
|
||||
else if (strcmp(op, "mod_int") == 0) { ABC3(MACH_MOD_INT); }
|
||||
else if (strcmp(op, "neg_int") == 0) { AB2(MACH_NEG_INT); }
|
||||
/* Typed float arithmetic */
|
||||
else if (strcmp(op, "add_float") == 0) { ABC3(MACH_ADD_FLOAT); }
|
||||
else if (strcmp(op, "sub_float") == 0) { ABC3(MACH_SUB_FLOAT); }
|
||||
else if (strcmp(op, "mul_float") == 0) { ABC3(MACH_MUL_FLOAT); }
|
||||
else if (strcmp(op, "div_float") == 0) { ABC3(MACH_DIV_FLOAT); }
|
||||
else if (strcmp(op, "mod_float") == 0) { ABC3(MACH_MOD_FLOAT); }
|
||||
else if (strcmp(op, "neg_float") == 0) { AB2(MACH_NEG_FLOAT); }
|
||||
/* Text */
|
||||
else if (strcmp(op, "concat") == 0) { ABC3(MACH_CONCAT); }
|
||||
/* Generic arithmetic */
|
||||
else if (strcmp(op, "add") == 0) { ABC3(MACH_ADD); }
|
||||
else if (strcmp(op, "subtract") == 0) { ABC3(MACH_SUB); }
|
||||
else if (strcmp(op, "multiply") == 0) { ABC3(MACH_MUL); }
|
||||
else if (strcmp(op, "divide") == 0) { ABC3(MACH_DIV); }
|
||||
else if (strcmp(op, "modulo") == 0) { ABC3(MACH_MOD); }
|
||||
else if (strcmp(op, "pow") == 0) { ABC3(MACH_POW); }
|
||||
else if (strcmp(op, "negate") == 0) { AB2(MACH_NEG); }
|
||||
/* Typed integer comparisons */
|
||||
else if (strcmp(op, "eq_int") == 0) { ABC3(MACH_EQ_INT); }
|
||||
else if (strcmp(op, "ne_int") == 0) { ABC3(MACH_NE_INT); }
|
||||
@@ -2569,7 +2467,7 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
|
||||
else if (strcmp(op, "setarg") == 0) {
|
||||
EM(MACH_ABC(MACH_SETARG, A1, A2, A3));
|
||||
}
|
||||
else if (strcmp(op, "invoke") == 0) {
|
||||
else if (strcmp(op, "invoke") == 0 || strcmp(op, "tail_invoke") == 0) {
|
||||
EM(MACH_ABC(MACH_INVOKE, A1, A2, 0));
|
||||
}
|
||||
else if (strcmp(op, "goframe") == 0) {
|
||||
|
||||
@@ -356,7 +356,7 @@ void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) {
|
||||
|
||||
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
|
||||
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
|
||||
int nr_slots = (int)objhdr_cap56(fr->hdr);
|
||||
int nr_slots = (int)objhdr_cap56(fr->header);
|
||||
int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
|
||||
|
||||
/* Copy args to C stack */
|
||||
|
||||
@@ -115,6 +115,14 @@
|
||||
#define gc_poison_region(addr, size) ((void)0)
|
||||
#define gc_unpoison_region(addr, size) ((void)0)
|
||||
#endif
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static inline size_t poison_page_align(size_t size) {
|
||||
size_t ps = (size_t)sysconf(_SC_PAGESIZE);
|
||||
return (size + ps - 1) & ~(ps - 1);
|
||||
}
|
||||
#endif /* POISON_HEAP */
|
||||
|
||||
#ifdef HAVE_ASAN
|
||||
@@ -361,13 +369,7 @@ struct JSClass {
|
||||
#define JS_MODE_BACKTRACE_BARRIER \
|
||||
(1 << 3) /* stop backtrace before this frame */
|
||||
|
||||
typedef struct JSFrameRegister {
|
||||
objhdr_t hdr; // capacity in this is the total number of words of the object, including the 4 words of overhead and all slots
|
||||
JSValue function; // JSFunction, function object being invoked
|
||||
JSValue caller; // JSFrameRegister, the frame that called this one
|
||||
JSValue address; // address of the instruction in the code that should be executed upon return
|
||||
JSValue slots[]; // inline memory. order is [this][input args][closed over vars][non closed over vars][temporaries]
|
||||
} JSFrameRegister; /// extra note: when this frame returns, caller should be set to 0. If caller is found to be 0, then the GC can reduce this frame's slots down to [this][input_args][closed over vars]; if no closed over vars it can be totally removed; may happen naturally in GC since it would have no refs?
|
||||
/* JSFrameRegister is now an alias for JSFrame — see JSFrame definition below */
|
||||
|
||||
/* ============================================================
|
||||
Register-Based VM Data Structures
|
||||
@@ -554,22 +556,6 @@ typedef enum MachOpcode {
|
||||
|
||||
/* === New mcode-derived opcodes (1:1 mapping to mcode IR) === */
|
||||
|
||||
/* Typed integer arithmetic (ABC) */
|
||||
MACH_ADD_INT, /* R(A) = R(B) + R(C) — int, overflow → float */
|
||||
MACH_SUB_INT, /* R(A) = R(B) - R(C) — int */
|
||||
MACH_MUL_INT, /* R(A) = R(B) * R(C) — int */
|
||||
MACH_DIV_INT, /* R(A) = R(B) / R(C) — int */
|
||||
MACH_MOD_INT, /* R(A) = R(B) % R(C) — int */
|
||||
MACH_NEG_INT, /* R(A) = -R(B) — int (AB) */
|
||||
|
||||
/* Typed float arithmetic (ABC) */
|
||||
MACH_ADD_FLOAT, /* R(A) = R(B) + R(C) — float */
|
||||
MACH_SUB_FLOAT, /* R(A) = R(B) - R(C) — float */
|
||||
MACH_MUL_FLOAT, /* R(A) = R(B) * R(C) — float */
|
||||
MACH_DIV_FLOAT, /* R(A) = R(B) / R(C) — float */
|
||||
MACH_MOD_FLOAT, /* R(A) = R(B) % R(C) — float */
|
||||
MACH_NEG_FLOAT, /* R(A) = -R(B) — float (AB) */
|
||||
|
||||
/* Text */
|
||||
MACH_CONCAT, /* R(A) = R(B) ++ R(C) — string concatenation */
|
||||
|
||||
@@ -723,18 +709,6 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = {
|
||||
[MACH_NEQ_TOL] = "neq_tol",
|
||||
[MACH_NOP] = "nop",
|
||||
/* Mcode-derived */
|
||||
[MACH_ADD_INT] = "add_int",
|
||||
[MACH_SUB_INT] = "sub_int",
|
||||
[MACH_MUL_INT] = "mul_int",
|
||||
[MACH_DIV_INT] = "div_int",
|
||||
[MACH_MOD_INT] = "mod_int",
|
||||
[MACH_NEG_INT] = "neg_int",
|
||||
[MACH_ADD_FLOAT] = "add_float",
|
||||
[MACH_SUB_FLOAT] = "sub_float",
|
||||
[MACH_MUL_FLOAT] = "mul_float",
|
||||
[MACH_DIV_FLOAT] = "div_float",
|
||||
[MACH_MOD_FLOAT] = "mod_float",
|
||||
[MACH_NEG_FLOAT] = "neg_float",
|
||||
[MACH_CONCAT] = "concat",
|
||||
[MACH_EQ_INT] = "eq_int",
|
||||
[MACH_NE_INT] = "ne_int",
|
||||
@@ -823,17 +797,18 @@ typedef struct JSCodeRegister {
|
||||
} JSCodeRegister;
|
||||
|
||||
|
||||
/* Frame for closures - used by link-time relocation model where closures
|
||||
reference outer frames via (depth, slot) addressing.
|
||||
Stores function as JSValue to survive GC movements. */
|
||||
/* Unified frame struct — used by the register VM and closures.
|
||||
All fields are JSValues so the GC can scan them uniformly. */
|
||||
typedef struct JSFrame {
|
||||
objhdr_t header; /* OBJ_FRAME, cap56 = slot count */
|
||||
JSValue function; /* JSValue for GC safety (use JS_VALUE_GET_FUNCTION) */
|
||||
JSValue caller; /* JSValue for GC safety (unused currently) */
|
||||
uint32_t return_pc;
|
||||
JSValue function; /* JSFunction, function object being invoked */
|
||||
JSValue caller; /* JSFrame, the frame that called this one */
|
||||
JSValue address; /* return PC stored as JS_NewInt32 */
|
||||
JSValue slots[]; /* [this][args][captured][locals][temps] */
|
||||
} JSFrame;
|
||||
|
||||
typedef JSFrame JSFrameRegister;
|
||||
|
||||
static inline objhdr_t objhdr_set_s (objhdr_t h, bool s) {
|
||||
return s ? (h | OBJHDR_S_MASK) : (h & ~OBJHDR_S_MASK);
|
||||
}
|
||||
@@ -903,7 +878,8 @@ typedef struct JSArray {
|
||||
JSValue values[]; /* inline flexible array member */
|
||||
} JSArray;
|
||||
|
||||
/* JSBlob - binary data per memory.md */
|
||||
/* JSBlob — not allocated on GC heap (blobs use JSRecord + opaque).
|
||||
Struct kept for reference; gc_object_size/gc_scan_object do not handle OBJ_BLOB. */
|
||||
typedef struct JSBlob {
|
||||
objhdr_t mist_hdr;
|
||||
word_t length;
|
||||
@@ -926,7 +902,7 @@ typedef slot JSRecordEntry;
|
||||
|
||||
typedef struct JSRecord {
|
||||
objhdr_t mist_hdr;
|
||||
struct JSRecord *proto;
|
||||
JSValue proto; /* prototype as JSValue (JS_NULL if none) */
|
||||
word_t len; /* number of entries */
|
||||
slot slots[]; /* slots[0] reserved: key low32=class_id, key high32=rec_id, val=opaque */
|
||||
} JSRecord;
|
||||
@@ -1071,6 +1047,13 @@ static JS_BOOL JSText_equal_ascii (const JSText *text, JSValue imm) {
|
||||
enough to call the interrupt callback often. */
|
||||
#define JS_INTERRUPT_COUNTER_INIT 10000
|
||||
|
||||
/* Auto-rooted C call argv — GC updates values in-place */
|
||||
typedef struct CCallRoot {
|
||||
JSValue *argv; /* points to C-stack-local array */
|
||||
int argc;
|
||||
struct CCallRoot *prev; /* stack for nesting (C -> JS -> C -> ...) */
|
||||
} CCallRoot;
|
||||
|
||||
struct JSContext {
|
||||
JSRuntime *rt;
|
||||
|
||||
@@ -1081,18 +1064,18 @@ struct JSContext {
|
||||
size_t current_block_size; /* current block size (64KB initially) */
|
||||
size_t next_block_size; /* doubles if <10% recovered after GC */
|
||||
|
||||
/* Stone arena - permanent immutable allocations */
|
||||
uint8_t *stone_base; /* stone arena base */
|
||||
uint8_t *stone_free; /* stone arena bump pointer */
|
||||
uint8_t *stone_end; /* stone arena end */
|
||||
/* Constant text pool — compilation constants */
|
||||
uint8_t *ct_base; /* pool base */
|
||||
uint8_t *ct_free; /* pool bump pointer */
|
||||
uint8_t *ct_end; /* pool end */
|
||||
|
||||
/* Stone text intern table */
|
||||
void *st_pages; /* stone page list for large allocations */
|
||||
uint32_t *st_text_hash; /* hash table (slot -> id) */
|
||||
JSText **st_text_array; /* array of JSText pointers indexed by id */
|
||||
uint32_t st_text_size; /* hash table size (power of 2) */
|
||||
uint32_t st_text_count; /* number of interned texts */
|
||||
uint32_t st_text_resize; /* threshold for resize */
|
||||
/* Constant text intern table */
|
||||
void *ct_pages; /* page list for large allocations */
|
||||
uint32_t *ct_hash; /* hash table (slot -> id) */
|
||||
JSText **ct_array; /* array of JSText pointers indexed by id */
|
||||
uint32_t ct_size; /* hash table size (power of 2) */
|
||||
uint32_t ct_count; /* number of interned texts */
|
||||
uint32_t ct_resize_threshold; /* threshold for resize */
|
||||
|
||||
uint16_t binary_object_count;
|
||||
int binary_object_size;
|
||||
@@ -1102,6 +1085,7 @@ struct JSContext {
|
||||
|
||||
JSGCRef *top_gc_ref; /* used to reference temporary GC roots (stack top) */
|
||||
JSGCRef *last_gc_ref; /* used to reference temporary GC roots (list) */
|
||||
CCallRoot *c_call_root; /* stack of auto-rooted C call argv arrays */
|
||||
|
||||
int class_count; /* size of class_array and class_proto */
|
||||
JSClass *class_array;
|
||||
@@ -1161,22 +1145,22 @@ static inline const char *JS_KeyGetStr (JSContext *ctx, char *buf, size_t buf_si
|
||||
|
||||
|
||||
/* ============================================================
|
||||
Stone Arena Functions
|
||||
Constant Text Pool Functions
|
||||
============================================================ */
|
||||
|
||||
/* Stone page for large allocations */
|
||||
typedef struct StonePage {
|
||||
struct StonePage *next;
|
||||
/* Constant text page for large allocations */
|
||||
typedef struct CTPage {
|
||||
struct CTPage *next;
|
||||
size_t size;
|
||||
uint8_t data[];
|
||||
} StonePage;
|
||||
} CTPage;
|
||||
|
||||
/* Initial stone text table size */
|
||||
#define ST_TEXT_INITIAL_SIZE 256
|
||||
/* Initial constant text table size */
|
||||
#define CT_INITIAL_SIZE 256
|
||||
|
||||
/* Allocate from stone arena (permanent, immutable memory) */
|
||||
/* Allocate from constant text pool */
|
||||
|
||||
/* Resize the stone text intern hash table */
|
||||
/* Resize the constant text intern hash table */
|
||||
|
||||
/* Realloc with slack reporting (for bump allocator)
|
||||
WARNING: This function is NOT GC-safe! The caller must protect the source
|
||||
@@ -1192,9 +1176,9 @@ static int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size);
|
||||
/* JS_MarkValue - mark a value during GC traversal.
|
||||
With copying GC, this is a no-op as we discover live objects by tracing. */
|
||||
|
||||
/* Helper to check if a pointer is in stone memory */
|
||||
static inline int is_stone_ptr (JSContext *ctx, void *ptr) {
|
||||
return (uint8_t *)ptr >= ctx->stone_base && (uint8_t *)ptr < ctx->stone_end;
|
||||
/* Helper to check if a pointer is in constant text pool memory */
|
||||
static inline int is_ct_ptr (JSContext *ctx, void *ptr) {
|
||||
return (uint8_t *)ptr >= ctx->ct_base && (uint8_t *)ptr < ctx->ct_end;
|
||||
}
|
||||
|
||||
/* Intern a UTF-32 string as a stone text, returning a JSValue string */
|
||||
@@ -1234,7 +1218,7 @@ typedef struct JSRegExp {
|
||||
|
||||
/* Get prototype from object (works for both JSRecord and JSRecord since they
|
||||
* share layout) */
|
||||
#define JS_OBJ_GET_PROTO(p) ((JSRecord *)((JSRecord *)(p))->proto)
|
||||
#define JS_OBJ_GET_PROTO(p) (JS_IsNull(((JSRecord *)(p))->proto) ? NULL : (JSRecord *)JS_VALUE_GET_PTR(((JSRecord *)(p))->proto))
|
||||
|
||||
/* Initial capacity for new records (mask = 7, 8 slots total) */
|
||||
#define JS_RECORD_INITIAL_MASK 7
|
||||
@@ -1554,9 +1538,9 @@ JSText *pretext_concat_value (JSContext *ctx, JSText *s, JSValue v);
|
||||
JSValue js_new_blob (JSContext *ctx, blob *b);
|
||||
/* Functions from header region (defined in runtime.c) */
|
||||
void *js_realloc (JSContext *ctx, void *ptr, size_t size);
|
||||
void *st_alloc (JSContext *ctx, size_t bytes, size_t align);
|
||||
void st_free_all (JSContext *ctx);
|
||||
int st_text_resize (JSContext *ctx);
|
||||
void *ct_alloc (JSContext *ctx, size_t bytes, size_t align);
|
||||
void ct_free_all (JSContext *ctx);
|
||||
int ct_resize (JSContext *ctx);
|
||||
JSValue intern_text_to_value (JSContext *ctx, const uint32_t *utf32, uint32_t len);
|
||||
JSValue js_key_new (JSContext *ctx, const char *str);
|
||||
JSValue js_key_new_len (JSContext *ctx, const char *str, size_t len);
|
||||
|
||||
763
source/runtime.c
763
source/runtime.c
File diff suppressed because it is too large
Load Diff
384
streamline.cm
384
streamline.cm
@@ -35,13 +35,9 @@ var streamline = function(ir, log) {
|
||||
var T_FUNCTION = "function"
|
||||
var T_BLOB = "blob"
|
||||
|
||||
var int_result_ops = {
|
||||
add_int: true, sub_int: true, mul_int: true,
|
||||
div_int: true, mod_int: true
|
||||
}
|
||||
var float_result_ops = {
|
||||
add_float: true, sub_float: true, mul_float: true,
|
||||
div_float: true, mod_float: true
|
||||
var numeric_ops = {
|
||||
add: true, subtract: true, multiply: true,
|
||||
divide: true, modulo: true, pow: true
|
||||
}
|
||||
var bool_result_ops = {
|
||||
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
|
||||
@@ -137,17 +133,13 @@ var streamline = function(ir, log) {
|
||||
} else if (op == "move") {
|
||||
src_type = slot_types[text(instr[2])]
|
||||
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
|
||||
} else if (int_result_ops[op] == true) {
|
||||
slot_types[text(instr[1])] = T_INT
|
||||
} else if (float_result_ops[op] == true) {
|
||||
slot_types[text(instr[1])] = T_FLOAT
|
||||
} else if (op == "concat") {
|
||||
slot_types[text(instr[1])] = T_TEXT
|
||||
} else if (bool_result_ops[op] == true) {
|
||||
slot_types[text(instr[1])] = T_BOOL
|
||||
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
|
||||
slot_types[text(instr[1])] = T_UNKNOWN
|
||||
} else if (op == "invoke") {
|
||||
} else if (op == "invoke" || op == "tail_invoke") {
|
||||
slot_types[text(instr[2])] = T_UNKNOWN
|
||||
} else if (op == "pop" || op == "get") {
|
||||
slot_types[text(instr[1])] = T_UNKNOWN
|
||||
@@ -159,10 +151,8 @@ var streamline = function(ir, log) {
|
||||
slot_types[text(instr[1])] = T_FUNCTION
|
||||
} else if (op == "length") {
|
||||
slot_types[text(instr[1])] = T_INT
|
||||
} else if (op == "neg_int") {
|
||||
slot_types[text(instr[1])] = T_INT
|
||||
} else if (op == "neg_float") {
|
||||
slot_types[text(instr[1])] = T_FLOAT
|
||||
} else if (op == "negate" || numeric_ops[op] == true) {
|
||||
slot_types[text(instr[1])] = T_UNKNOWN
|
||||
} else if (op == "bitnot" || op == "bitand" || op == "bitor" ||
|
||||
op == "bitxor" || op == "shl" || op == "shr" || op == "ushr") {
|
||||
slot_types[text(instr[1])] = T_INT
|
||||
@@ -219,6 +209,16 @@ var streamline = function(ir, log) {
|
||||
return null
|
||||
}
|
||||
|
||||
var seed_writes = function(slot_types, write_types) {
|
||||
var keys = array(write_types)
|
||||
var k = 0
|
||||
while (k < length(keys)) {
|
||||
slot_types[keys[k]] = write_types[keys[k]]
|
||||
k = k + 1
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Pass: infer_param_types — backward type inference
|
||||
// Scans typed operators to infer immutable parameter types.
|
||||
@@ -246,24 +246,24 @@ var streamline = function(ir, log) {
|
||||
instr = instructions[i]
|
||||
if (is_array(instr)) {
|
||||
op = instr[0]
|
||||
if (op == "add_int" || op == "sub_int" || op == "mul_int" ||
|
||||
op == "div_int" || op == "mod_int" ||
|
||||
op == "eq_int" || op == "ne_int" || op == "lt_int" ||
|
||||
op == "gt_int" || op == "le_int" || op == "ge_int" ||
|
||||
op == "bitand" || op == "bitor" || op == "bitxor" ||
|
||||
op == "shl" || op == "shr" || op == "ushr") {
|
||||
if (op == "subtract" || op == "multiply" ||
|
||||
op == "divide" || op == "modulo" || op == "pow") {
|
||||
merge_backward(backward_types, instr[2], T_NUM)
|
||||
merge_backward(backward_types, instr[3], T_NUM)
|
||||
} else if (op == "negate") {
|
||||
merge_backward(backward_types, instr[2], T_NUM)
|
||||
} else if (op == "eq_int" || op == "ne_int" || op == "lt_int" ||
|
||||
op == "gt_int" || op == "le_int" || op == "ge_int" ||
|
||||
op == "bitand" || op == "bitor" || op == "bitxor" ||
|
||||
op == "shl" || op == "shr" || op == "ushr") {
|
||||
merge_backward(backward_types, instr[2], T_INT)
|
||||
merge_backward(backward_types, instr[3], T_INT)
|
||||
} else if (op == "neg_int" || op == "bitnot") {
|
||||
} else if (op == "bitnot") {
|
||||
merge_backward(backward_types, instr[2], T_INT)
|
||||
} else if (op == "add_float" || op == "sub_float" || op == "mul_float" ||
|
||||
op == "div_float" || op == "mod_float" ||
|
||||
op == "eq_float" || op == "ne_float" || op == "lt_float" ||
|
||||
} else if (op == "eq_float" || op == "ne_float" || op == "lt_float" ||
|
||||
op == "gt_float" || op == "le_float" || op == "ge_float") {
|
||||
merge_backward(backward_types, instr[2], T_FLOAT)
|
||||
merge_backward(backward_types, instr[3], T_FLOAT)
|
||||
} else if (op == "neg_float") {
|
||||
merge_backward(backward_types, instr[2], T_FLOAT)
|
||||
} else if (op == "concat" ||
|
||||
op == "eq_text" || op == "ne_text" || op == "lt_text" ||
|
||||
op == "gt_text" || op == "le_text" || op == "ge_text") {
|
||||
@@ -284,6 +284,13 @@ var streamline = function(ir, log) {
|
||||
merge_backward(backward_types, instr[1], T_RECORD)
|
||||
} else if (op == "push") {
|
||||
merge_backward(backward_types, instr[1], T_ARRAY)
|
||||
} else if (op == "load_index") {
|
||||
merge_backward(backward_types, instr[2], T_ARRAY)
|
||||
merge_backward(backward_types, instr[3], T_INT)
|
||||
} else if (op == "load_field") {
|
||||
merge_backward(backward_types, instr[2], T_RECORD)
|
||||
} else if (op == "pop") {
|
||||
merge_backward(backward_types, instr[2], T_ARRAY)
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
@@ -301,15 +308,131 @@ var streamline = function(ir, log) {
|
||||
return param_types
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Pass: infer_slot_write_types — slot write-type invariance
|
||||
// Scans all instructions to find non-parameter slots where
|
||||
// every write produces the same type. These types persist
|
||||
// across label join points.
|
||||
// =========================================================
|
||||
var infer_slot_write_types = function(func) {
|
||||
var instructions = func.instructions
|
||||
var nr_args = func.nr_args != null ? func.nr_args : 0
|
||||
var num_instr = 0
|
||||
var write_types = null
|
||||
var result = null
|
||||
var keys = null
|
||||
var i = 0
|
||||
var k = 0
|
||||
var instr = null
|
||||
var op = null
|
||||
var slot = 0
|
||||
var typ = null
|
||||
var wt = null
|
||||
|
||||
if (instructions == null) {
|
||||
return {}
|
||||
}
|
||||
|
||||
num_instr = length(instructions)
|
||||
write_types = {}
|
||||
i = 0
|
||||
while (i < num_instr) {
|
||||
instr = instructions[i]
|
||||
if (!is_array(instr)) {
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
|
||||
op = instr[0]
|
||||
slot = -1
|
||||
typ = null
|
||||
|
||||
if (op == "int") {
|
||||
slot = instr[1]
|
||||
typ = T_INT
|
||||
} else if (op == "true" || op == "false") {
|
||||
slot = instr[1]
|
||||
typ = T_BOOL
|
||||
} else if (op == "null") {
|
||||
slot = instr[1]
|
||||
typ = T_NULL
|
||||
} else if (op == "access") {
|
||||
slot = instr[1]
|
||||
typ = access_value_type(instr[2])
|
||||
} else if (op == "array") {
|
||||
slot = instr[1]
|
||||
typ = T_ARRAY
|
||||
} else if (op == "record") {
|
||||
slot = instr[1]
|
||||
typ = T_RECORD
|
||||
} else if (op == "function") {
|
||||
slot = instr[1]
|
||||
typ = T_FUNCTION
|
||||
} else if (op == "length") {
|
||||
slot = instr[1]
|
||||
typ = T_INT
|
||||
} else if (op == "bitnot" || op == "bitand" ||
|
||||
op == "bitor" || op == "bitxor" || op == "shl" ||
|
||||
op == "shr" || op == "ushr") {
|
||||
slot = instr[1]
|
||||
typ = T_INT
|
||||
} else if (op == "negate") {
|
||||
slot = instr[1]
|
||||
typ = T_UNKNOWN
|
||||
} else if (op == "concat") {
|
||||
slot = instr[1]
|
||||
typ = T_TEXT
|
||||
} else if (bool_result_ops[op] == true) {
|
||||
slot = instr[1]
|
||||
typ = T_BOOL
|
||||
} else if (op == "eq" || op == "ne" || op == "lt" ||
|
||||
op == "le" || op == "gt" || op == "ge" || op == "in") {
|
||||
slot = instr[1]
|
||||
typ = T_BOOL
|
||||
} else if (op == "add" || op == "subtract" || op == "multiply" ||
|
||||
op == "divide" || op == "modulo" || op == "pow") {
|
||||
slot = instr[1]
|
||||
typ = T_UNKNOWN
|
||||
} else if (op == "move" || op == "load_field" || op == "load_index" ||
|
||||
op == "load_dynamic" || op == "pop" || op == "get") {
|
||||
slot = instr[1]
|
||||
typ = T_UNKNOWN
|
||||
} else if (op == "invoke" || op == "tail_invoke") {
|
||||
slot = instr[2]
|
||||
typ = T_UNKNOWN
|
||||
}
|
||||
|
||||
if (slot > 0 && slot > nr_args) {
|
||||
merge_backward(write_types, slot, typ != null ? typ : T_UNKNOWN)
|
||||
}
|
||||
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
// Filter to only slots with known (non-unknown) types
|
||||
result = {}
|
||||
keys = array(write_types)
|
||||
k = 0
|
||||
while (k < length(keys)) {
|
||||
wt = write_types[keys[k]]
|
||||
if (wt != null && wt != T_UNKNOWN) {
|
||||
result[keys[k]] = wt
|
||||
}
|
||||
k = k + 1
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Pass: eliminate_type_checks — language-level type narrowing
|
||||
// Eliminates is_<type>/jump pairs when type is known.
|
||||
// Reduces load_dynamic/store_dynamic to field/index forms.
|
||||
// =========================================================
|
||||
var eliminate_type_checks = function(func, param_types, log) {
|
||||
var eliminate_type_checks = function(func, param_types, write_types, log) {
|
||||
var instructions = func.instructions
|
||||
var nr_args = func.nr_args != null ? func.nr_args : 0
|
||||
var has_params = false
|
||||
var has_writes = false
|
||||
var num_instr = 0
|
||||
var slot_types = null
|
||||
var nc = 0
|
||||
@@ -344,11 +467,15 @@ var streamline = function(ir, log) {
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
has_writes = length(array(write_types)) > 0
|
||||
|
||||
slot_types = {}
|
||||
if (has_params) {
|
||||
seed_params(slot_types, param_types, nr_args)
|
||||
}
|
||||
if (has_writes) {
|
||||
seed_writes(slot_types, write_types)
|
||||
}
|
||||
|
||||
i = 0
|
||||
while (i < num_instr) {
|
||||
@@ -359,6 +486,9 @@ var streamline = function(ir, log) {
|
||||
if (has_params) {
|
||||
seed_params(slot_types, param_types, nr_args)
|
||||
}
|
||||
if (has_writes) {
|
||||
seed_writes(slot_types, write_types)
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
@@ -603,11 +733,8 @@ var streamline = function(ir, log) {
|
||||
var instr = null
|
||||
var op = null
|
||||
var ilen = 0
|
||||
var v2 = null
|
||||
var v3 = null
|
||||
var sv = null
|
||||
var events = null
|
||||
var rule = null
|
||||
|
||||
if (instructions == null || length(instructions) == 0) {
|
||||
return null
|
||||
@@ -655,179 +782,6 @@ var streamline = function(ir, log) {
|
||||
}
|
||||
}
|
||||
|
||||
// Integer: x+0, x-0 → move x
|
||||
if (op == "add_int" || op == "sub_int") {
|
||||
v3 = slot_values[text(instr[3])]
|
||||
if (v3 == 0) {
|
||||
rule = op == "add_int" ? "add_zero" : "sub_zero"
|
||||
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: rule, at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[3], value: 0}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if (op == "add_int") {
|
||||
v2 = slot_values[text(instr[2])]
|
||||
if (v2 == 0) {
|
||||
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "add_zero", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[2], value: 0}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if (op == "mul_int") {
|
||||
v3 = slot_values[text(instr[3])]
|
||||
v2 = slot_values[text(instr[2])]
|
||||
if (v3 == 1) {
|
||||
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "mul_one", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[3], value: 1}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if (v2 == 1) {
|
||||
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "mul_one", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[2], value: 1}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if (v3 == 0 || v2 == 0) {
|
||||
instructions[i] = ["int", instr[1], 0, instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "mul_zero", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {value: 0}
|
||||
}
|
||||
}
|
||||
slot_values[text(instr[1])] = 0
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
} else if (op == "div_int") {
|
||||
v3 = slot_values[text(instr[3])]
|
||||
if (v3 == 1) {
|
||||
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "div_one", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[3], value: 1}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Float: x+0, x-0 → move x; x*1, x/1 → move x
|
||||
// (skip mul_float * 0 — not safe with NaN/Inf)
|
||||
if (op == "add_float" || op == "sub_float") {
|
||||
v3 = slot_values[text(instr[3])]
|
||||
if (v3 == 0) {
|
||||
rule = op == "add_float" ? "add_zero" : "sub_zero"
|
||||
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: rule, at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[3], value: 0}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if (op == "add_float") {
|
||||
v2 = slot_values[text(instr[2])]
|
||||
if (v2 == 0) {
|
||||
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "add_zero", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[2], value: 0}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if (op == "mul_float") {
|
||||
v3 = slot_values[text(instr[3])]
|
||||
v2 = slot_values[text(instr[2])]
|
||||
if (v3 == 1) {
|
||||
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "mul_one", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[3], value: 1}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
if (v2 == 1) {
|
||||
instructions[i] = ["move", instr[1], instr[3], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "mul_one", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[2], value: 1}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
} else if (op == "div_float") {
|
||||
v3 = slot_values[text(instr[3])]
|
||||
if (v3 == 1) {
|
||||
instructions[i] = ["move", instr[1], instr[2], instr[ilen - 2], instr[ilen - 1]]
|
||||
if (events != null) {
|
||||
events[] = {
|
||||
event: "rewrite", pass: "simplify_algebra",
|
||||
rule: "div_one", at: i,
|
||||
before: instr, after: instructions[i],
|
||||
why: {slot: instr[3], value: 1}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Same-slot comparisons
|
||||
if (is_number(instr[2]) && instr[2] == instr[3]) {
|
||||
if (op == "eq_int" || op == "eq_float" || op == "eq_text" ||
|
||||
@@ -867,7 +821,7 @@ var streamline = function(ir, log) {
|
||||
}
|
||||
|
||||
// Clear value tracking for dest-producing ops (not reads-only)
|
||||
if (op == "invoke") {
|
||||
if (op == "invoke" || op == "tail_invoke") {
|
||||
slot_values[text(instr[2])] = null
|
||||
} else if (op != "int" && op != "access" && op != "true" &&
|
||||
op != "false" && op != "move" && op != "null" &&
|
||||
@@ -1050,7 +1004,7 @@ var streamline = function(ir, log) {
|
||||
if (after_return) {
|
||||
nc = nc + 1
|
||||
instructions[i] = "_nop_ur_" + text(nc)
|
||||
} else if (instr[0] == "return" || instr[0] == "disrupt") {
|
||||
} else if (instr[0] == "return") {
|
||||
after_return = true
|
||||
}
|
||||
}
|
||||
@@ -1123,6 +1077,7 @@ var streamline = function(ir, log) {
|
||||
// =========================================================
|
||||
var optimize_function = function(func, log) {
|
||||
var param_types = null
|
||||
var write_types = null
|
||||
var slot_types = null
|
||||
if (func.instructions == null || length(func.instructions) == 0) {
|
||||
return null
|
||||
@@ -1132,8 +1087,13 @@ var streamline = function(ir, log) {
|
||||
return param_types
|
||||
})
|
||||
if (verify_fn) verify_fn(func, "after infer_param_types")
|
||||
run_pass(func, "infer_slot_write_types", function() {
|
||||
write_types = infer_slot_write_types(func)
|
||||
return write_types
|
||||
})
|
||||
if (verify_fn) verify_fn(func, "after infer_slot_write_types")
|
||||
run_pass(func, "eliminate_type_checks", function() {
|
||||
slot_types = eliminate_type_checks(func, param_types, log)
|
||||
slot_types = eliminate_type_checks(func, param_types, write_types, log)
|
||||
return slot_types
|
||||
})
|
||||
if (verify_fn) verify_fn(func, "after eliminate_type_checks")
|
||||
@@ -1156,10 +1116,10 @@ var streamline = function(ir, log) {
|
||||
return eliminate_moves(func, log)
|
||||
})
|
||||
if (verify_fn) verify_fn(func, "after eliminate_moves")
|
||||
// NOTE: eliminate_unreachable is disabled because disruption handler
|
||||
// code is placed after return/disrupt without label boundaries.
|
||||
// Re-enable once mcode.cm emits labels for handler entry points.
|
||||
//eliminate_unreachable(func)
|
||||
run_pass(func, "eliminate_unreachable", function() {
|
||||
return eliminate_unreachable(func)
|
||||
})
|
||||
if (verify_fn) verify_fn(func, "after eliminate_unreachable")
|
||||
run_pass(func, "eliminate_dead_jumps", function() {
|
||||
return eliminate_dead_jumps(func, log)
|
||||
})
|
||||
|
||||
29
verify_ir.cm
29
verify_ir.cm
@@ -18,8 +18,7 @@ var slot_positions = {
|
||||
// Unary — dest, src
|
||||
move: [0, 1],
|
||||
not: [0, 1],
|
||||
neg_int: [0, 1],
|
||||
neg_float: [0, 1],
|
||||
negate: [0, 1],
|
||||
bitnot: [0, 1],
|
||||
length: [0, 1],
|
||||
typeof: [0, 1],
|
||||
@@ -41,16 +40,6 @@ var slot_positions = {
|
||||
divide: [0, 1, 2],
|
||||
modulo: [0, 1, 2],
|
||||
pow: [0, 1, 2],
|
||||
add_int: [0, 1, 2],
|
||||
sub_int: [0, 1, 2],
|
||||
mul_int: [0, 1, 2],
|
||||
div_int: [0, 1, 2],
|
||||
mod_int: [0, 1, 2],
|
||||
add_float: [0, 1, 2],
|
||||
sub_float: [0, 1, 2],
|
||||
mul_float: [0, 1, 2],
|
||||
div_float: [0, 1, 2],
|
||||
mod_float: [0, 1, 2],
|
||||
eq: [0, 1, 2],
|
||||
ne: [0, 1, 2],
|
||||
lt: [0, 1, 2],
|
||||
@@ -113,6 +102,7 @@ var slot_positions = {
|
||||
|
||||
// Invoke
|
||||
invoke: [0, 1],
|
||||
tail_invoke: [0, 1],
|
||||
goinvoke: [0],
|
||||
frame: [0, 1],
|
||||
setarg: [0, 2]
|
||||
@@ -122,15 +112,13 @@ var slot_positions = {
|
||||
var writes_dest = {
|
||||
access: true, int: true, true: true, false: true, null: true,
|
||||
function: true, array: true, record: true,
|
||||
move: true, not: true, neg_int: true, neg_float: true, bitnot: true,
|
||||
move: true, not: true, negate: true, bitnot: true,
|
||||
length: true, typeof: true,
|
||||
is_int: true, is_text: true, is_num: true,
|
||||
is_bool: true, is_null: true, is_array: true,
|
||||
is_func: true, is_record: true, is_stone: true, is_identical: true,
|
||||
add: true, subtract: true, multiply: true, divide: true,
|
||||
modulo: true, pow: true,
|
||||
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
|
||||
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
|
||||
eq: true, ne: true, lt: true, le: true, gt: true, ge: true,
|
||||
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
|
||||
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
|
||||
@@ -141,7 +129,8 @@ var writes_dest = {
|
||||
in: true,
|
||||
load_index: true, load_dynamic: true, load_field: true,
|
||||
pop: true, get: true,
|
||||
invoke: true
|
||||
invoke: true,
|
||||
tail_invoke: true
|
||||
}
|
||||
|
||||
// Opcodes where invoke writes to position 1 (result slot), not position 0
|
||||
@@ -260,14 +249,10 @@ var check_type_consistency = function(func) {
|
||||
var T_BOOL = "bool"
|
||||
|
||||
var int_ops = {
|
||||
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
|
||||
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
|
||||
neg_int: true
|
||||
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true
|
||||
}
|
||||
var float_ops = {
|
||||
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
|
||||
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
|
||||
neg_float: true
|
||||
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true
|
||||
}
|
||||
var text_ops = {
|
||||
eq_text: true, ne_text: true, lt_text: true, gt_text: true, le_text: true, ge_text: true,
|
||||
|
||||
Reference in New Issue
Block a user